diff options
Diffstat (limited to 'lib')
123 files changed, 3866 insertions, 7693 deletions
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index b2c27d1..a84dafb 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -17,24 +17,30 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Value.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CallSite.h" using namespace llvm; -/// As its comment mentions, PointerMayBeCaptured can be expensive. -/// However, it's not easy for BasicAA to cache the result, because -/// it's an ImmutablePass. To work around this, bound queries at a -/// fixed number of uses. -/// -/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep -/// a cache. Then we can move the code from BasicAliasAnalysis into -/// that path, and remove this threshold. -static int const Threshold = 20; +namespace { + struct SimpleCaptureTracker { + explicit SimpleCaptureTracker(bool ReturnCaptures) + : ReturnCaptures(ReturnCaptures), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { return true; } + + bool captured(Instruction *I) { + if (isa<ReturnInst>(I) && !ReturnCaptures) + return false; + + Captured = true; + return true; + } + + bool ReturnCaptures; + + bool Captured; + }; +} /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can @@ -45,104 +51,13 @@ static int const Threshold = 20; /// counts as capturing it or not. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { - assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); - SmallVector<Use*, Threshold> Worklist; - SmallSet<Use*, Threshold> Visited; - int Count = 0; - - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - // If there are lots of uses, conservatively say that the value - // is captured to avoid taking too much compile time. - if (Count++ >= Threshold) - return true; - - Use *U = &UI.getUse(); - Visited.insert(U); - Worklist.push_back(U); - } - - while (!Worklist.empty()) { - Use *U = Worklist.pop_back_val(); - Instruction *I = cast<Instruction>(U->getUser()); - V = U->get(); - - switch (I->getOpcode()) { - case Instruction::Call: - case Instruction::Invoke: { - CallSite CS(I); - // Not captured if the callee is readonly, doesn't return a copy through - // its return value and doesn't unwind (a readonly function can leak bits - // by throwing an exception or not depending on the input value). - if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) - break; - - // Not captured if only passed via 'nocapture' arguments. Note that - // calling a function pointer does not in itself cause the pointer to - // be captured. This is a subtle point considering that (for example) - // the callee might return its own address. It is analogous to saying - // that loading a value from a pointer does not cause the pointer to be - // captured, even though the loaded value might be the pointer itself - // (think of self-referential objects). - CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (CallSite::arg_iterator A = B; A != E; ++A) - if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture)) - // The parameter is not marked 'nocapture' - captured. - return true; - // Only passed via 'nocapture' arguments, or is the called function - not - // captured. - break; - } - case Instruction::Load: - // Loading from a pointer does not cause it to be captured. - break; - case Instruction::VAArg: - // "va-arg" from a pointer does not cause it to be captured. - break; - case Instruction::Ret: - if (ReturnCaptures) - return true; - break; - case Instruction::Store: - if (V == I->getOperand(0)) - // Stored the pointer - conservatively assume it may be captured. - // TODO: If StoreCaptures is not true, we could do Fancy analysis - // to determine whether this store is not actually an escape point. - // In that case, BasicAliasAnalysis should be updated as well to - // take advantage of this. - return true; - // Storing to the pointee does not cause the pointer to be captured. - break; - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::PHI: - case Instruction::Select: - // The original value is not captured via this if the new value isn't. - for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - Use *U = &UI.getUse(); - if (Visited.insert(U)) - Worklist.push_back(U); - } - break; - case Instruction::ICmp: - // Don't count comparisons of a no-alias return value against null as - // captures. This allows us to ignore comparisons of malloc results - // with null, for example. - if (isNoAliasCall(V->stripPointerCasts())) - if (ConstantPointerNull *CPN = - dyn_cast<ConstantPointerNull>(I->getOperand(1))) - if (CPN->getType()->getAddressSpace() == 0) - break; - // Otherwise, be conservative. There are crazy ways to capture pointers - // using comparisons. - return true; - default: - // Something else - be conservative and say it is captured. - return true; - } - } - - // All uses examined - not captured. - return false; + // TODO: If StoreCaptures is not true, we could do Fancy analysis + // to determine whether this store is not actually an escape point. + // In that case, BasicAliasAnalysis should be updated as well to + // take advantage of this. + (void)StoreCaptures; + + SimpleCaptureTracker SCT(ReturnCaptures); + PointerMayBeCaptured(V, SCT); + return SCT.Captured; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 92967c0..323c84f 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/Function.h" #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -91,6 +92,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); TD = getAnalysisIfAvailable<TargetData>(); + DT = getAnalysisIfAvailable<DominatorTree>(); if (PredCache == 0) PredCache.reset(new PredIteratorCache()); return false; @@ -331,6 +333,82 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, return 0; } +namespace { + /// Only find pointer captures which happen before the given instruction. Uses + /// the dominator tree to determine whether one instruction is before another. + struct CapturesBefore { + CapturesBefore(const Instruction *I, DominatorTree *DT) + : BeforeHere(I), DT(DT), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { + Instruction *I = cast<Instruction>(U->getUser()); + if (BeforeHere != I && DT->dominates(BeforeHere, I)) + return false; + return true; + } + + bool captured(Instruction *I) { + if (BeforeHere != I && DT->dominates(BeforeHere, I)) + return false; + Captured = true; + return true; + } + + const Instruction *BeforeHere; + DominatorTree *DT; + + bool Captured; + }; +} + +AliasAnalysis::ModRefResult +MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst, + const AliasAnalysis::Location &MemLoc) { + AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); + if (MR != AliasAnalysis::ModRef) return MR; + + // FIXME: this is really just shoring-up a deficiency in alias analysis. + // BasicAA isn't willing to spend linear time determining whether an alloca + // was captured before or after this particular call, while we are. However, + // with a smarter AA in place, this test is just wasting compile time. + if (!DT) return AliasAnalysis::ModRef; + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); + if (!isIdentifiedObject(Object) || isa<GlobalVariable>(Object)) + return AliasAnalysis::ModRef; + ImmutableCallSite CS(Inst); + if (!CS.getInstruction()) return AliasAnalysis::ModRef; + + CapturesBefore CB(Inst, DT); + llvm::PointerMayBeCaptured(Object, CB); + + if (isa<Constant>(Object) || CS.getInstruction() == Object || CB.Captured) + return AliasAnalysis::ModRef; + + unsigned ArgNo = 0; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. + if (!(*CI)->getType()->isPointerTy() || + (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && + !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!AA->isNoAlias(AliasAnalysis::Location(*CI), + AliasAnalysis::Location(Object))) { + return AliasAnalysis::ModRef; + } + } + return AliasAnalysis::NoModRef; +} + /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases @@ -478,7 +556,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - switch (AA->getModRefInfo(Inst, MemLoc)) { + switch (getModRefInfo(Inst, MemLoc)) { case AliasAnalysis::NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index b940d93..ac00259 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4853,10 +4853,18 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // Also evaluate the other PHI nodes. However, we don't get to stop if we // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. + SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute; for (DenseMap<Instruction *, Constant *>::const_iterator I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ PHINode *PHI = dyn_cast<PHINode>(I->first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(std::make_pair(PHI, I->second)); + } + // We use two distinct loops because EvaluateExpression may invalidate any + // iterators into CurrentIterVals. + for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator + I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = I->first; Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); @@ -4928,10 +4936,20 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // Update all the PHI nodes for the next iteration. DenseMap<Instruction *, Constant *> NextIterVals; + + // Create a list of which PHIs we need to compute. We want to do this before + // calling EvaluateExpression on them because that may invalidate iterators + // into CurrentIterVals. + SmallVector<PHINode *, 8> PHIsToCompute; for (DenseMap<Instruction *, Constant *>::const_iterator I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ PHINode *PHI = dyn_cast<PHINode>(I->first); if (!PHI || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(PHI); + } + for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(), + E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = *I; Constant *&NextPHI = NextIterVals[PHI]; if (NextPHI) continue; // Already computed! diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 1e51709..159c096 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -19,6 +19,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Analysis/DIBuilder.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" @@ -1095,7 +1096,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addToContextOwner(VariableDIE, GVContext); // Add location. bool addToAccelTable = false; - DIE *VariableSpecDIE; + DIE *VariableSpecDIE = NULL; if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 581f04b..dc46a58 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -772,6 +772,13 @@ void DwarfDebug::endModule() { DIE *ISP = *AI; FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } + for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(), + AE = AbstractSPDies.end(); AI != AE; ++AI) { + DIE *ISP = AI->second; + if (InlinedSubprogramDIEs.count(ISP)) + continue; + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 1bbe7a0..c8d4dcf 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -70,7 +70,6 @@ add_llvm_library(LLVMCodeGen RegAllocBasic.cpp RegAllocFast.cpp RegAllocGreedy.cpp - RegAllocLinearScan.cpp RegAllocPBQP.cpp RegisterClassInfo.cpp RegisterCoalescer.cpp @@ -98,7 +97,6 @@ add_llvm_library(LLVMCodeGen TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp - VirtRegRewriter.cpp ) add_llvm_library_dependencies(LLVMCodeGen diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 899baad..3112c22 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -42,7 +42,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePeepholeOptimizerPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); - initializeRALinScanPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenderMachineFunctionPass(Registry); initializeSlotIndexesPass(Registry); diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index fc0b612..050edce 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -45,7 +45,7 @@ using namespace llvm; /// DomainValue for each register, but it may contain multiple execution /// domains. A register value is initially created in a single execution /// domain, but if we were forced to pay the penalty of a domain crossing, we -/// keep track of the fact the the register is now available in multiple +/// keep track of the fact that the register is now available in multiple /// domains. namespace { struct DomainValue { @@ -57,9 +57,6 @@ struct DomainValue { // domains where the register is available for free. unsigned AvailableDomains; - // Position of the last defining instruction. - unsigned Dist; - // Pointer to the next DomainValue in a chain. When two DomainValues are // merged, Victim.Next is set to point to Victor, so old DomainValue // references can be updated by folowing the chain. @@ -101,7 +98,7 @@ struct DomainValue { // Clear this DomainValue and point to next which has all its data. void clear() { - AvailableDomains = Dist = 0; + AvailableDomains = 0; Next = 0; Instrs.clear(); } @@ -109,6 +106,21 @@ struct DomainValue { } namespace { +/// LiveReg - Information about a live register. +struct LiveReg { + /// Value currently in this register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + DomainValue *Value; + + /// Instruction that defined this register, relative to the beginning of the + /// current basic block. When a LiveReg is used to represent a live-out + /// register, this value is relative to the end of the basic block, so it + /// will be a negative number. + int Def; +}; +} // anonynous namespace + +namespace { class ExeDepsFix : public MachineFunctionPass { static char ID; SpecificBumpPtrAllocator<DomainValue> Allocator; @@ -120,10 +132,17 @@ class ExeDepsFix : public MachineFunctionPass { const TargetRegisterInfo *TRI; std::vector<int> AliasMap; const unsigned NumRegs; - DomainValue **LiveRegs; - typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; + LiveReg *LiveRegs; + typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; LiveOutMap LiveOuts; - unsigned Distance; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; + + /// True when the current block has a predecessor that hasn't been visited + /// yet. + bool SeenUnknownBackEdge; public: ExeDepsFix(const TargetRegisterClass *rc) @@ -160,10 +179,10 @@ private: void collapse(DomainValue *dv, unsigned domain); bool merge(DomainValue *A, DomainValue *B); - bool enterBasicBlock(MachineBasicBlock*); + void enterBasicBlock(MachineBasicBlock*); void leaveBasicBlock(MachineBasicBlock*); void visitInstr(MachineInstr*); - void visitGenericInstr(MachineInstr*); + void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); }; @@ -182,7 +201,6 @@ DomainValue *ExeDepsFix::alloc(int domain) { DomainValue *dv = Avail.empty() ? new(Allocator.Allocate()) DomainValue : Avail.pop_back_val(); - dv->Dist = Distance; if (domain >= 0) dv->addDomain(domain); assert(dv->Refs == 0 && "Reference count wasn't cleared"); @@ -231,32 +249,31 @@ DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { /// Set LiveRegs[rx] = dv, updating reference counts. void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs) { - LiveRegs = new DomainValue*[NumRegs]; - std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); - } + assert(LiveRegs && "Must enter basic block first."); - if (LiveRegs[rx] == dv) + if (LiveRegs[rx].Value == dv) return; - if (LiveRegs[rx]) - release(LiveRegs[rx]); - LiveRegs[rx] = retain(dv); + if (LiveRegs[rx].Value) + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = retain(dv); } // Kill register rx, recycle or collapse any DomainValue. void ExeDepsFix::kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs || !LiveRegs[rx]) return; + assert(LiveRegs && "Must enter basic block first."); + if (!LiveRegs[rx].Value) + return; - release(LiveRegs[rx]); - LiveRegs[rx] = 0; + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = 0; } /// Force register rx into domain. void ExeDepsFix::force(int rx, unsigned domain) { assert(unsigned(rx) < NumRegs && "Invalid index"); - DomainValue *dv; - if (LiveRegs && (dv = LiveRegs[rx])) { + assert(LiveRegs && "Must enter basic block first."); + if (DomainValue *dv = LiveRegs[rx].Value) { if (dv->isCollapsed()) dv->addDomain(domain); else if (dv->hasDomain(domain)) @@ -265,8 +282,8 @@ void ExeDepsFix::force(int rx, unsigned domain) { // This is an incompatible open DomainValue. Collapse it to whatever and // force the new value into domain. This costs a domain crossing. collapse(dv, dv->getFirstDomain()); - assert(LiveRegs[rx] && "Not live after collapse?"); - LiveRegs[rx]->addDomain(domain); + assert(LiveRegs[rx].Value && "Not live after collapse?"); + LiveRegs[rx].Value->addDomain(domain); } } else { // Set up basic collapsed DomainValue. @@ -287,7 +304,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { // If there are multiple users, give them new, unique DomainValues. if (LiveRegs && dv->Refs > 1) for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == dv) + if (LiveRegs[rx].Value == dv) setLiveReg(rx, alloc(domain)); } @@ -303,7 +320,6 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { if (!common) return false; A->AvailableDomains = common; - A->Dist = std::max(A->Dist, B->Dist); A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); // Clear the old DomainValue so we won't try to swizzle instructions twice. @@ -312,66 +328,103 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { B->Next = retain(A); for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == B) + if (LiveRegs[rx].Value == B) setLiveReg(rx, A); return true; } // enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. -// Return true if some predecessor hasn't been processed yet (like on a loop -// back-edge). -bool ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { +void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Detect back-edges from predecessors we haven't processed yet. - bool seenBackEdge = false; + SeenUnknownBackEdge = false; - // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), + // Reset instruction counter in each basic block. + CurInstr = 0; + + // Set up LiveRegs to represent registers entering MBB. + if (!LiveRegs) + LiveRegs = new LiveReg[NumRegs]; + + // Default values are 'nothing happened a long time ago'. + for (unsigned rx = 0; rx != NumRegs; ++rx) { + LiveRegs[rx].Value = 0; + LiveRegs[rx].Def = -(1 << 20); + } + + // This is the entry block. + if (MBB->pred_empty()) { + for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - int rx = regIndex(*i); - if (rx < 0) continue; - for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), - pe = MBB->pred_end(); pi != pe; ++pi) { - LiveOutMap::const_iterator fi = LiveOuts.find(*pi); - if (fi == LiveOuts.end()) { - seenBackEdge = true; + int rx = regIndex(*i); + if (rx < 0) continue; - } - if (!fi->second) + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[rx].Def = -1; + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + LiveOutMap::const_iterator fi = LiveOuts.find(*pi); + if (fi == LiveOuts.end()) { + SeenUnknownBackEdge = true; + continue; + } + assert(fi->second && "Can't have NULL entries"); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + // Use the most recent predecessor def for each register. + LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def); + + DomainValue *pdv = resolve(fi->second[rx].Value); + if (!pdv) continue; - DomainValue *pdv = resolve(fi->second[rx]); - if (!pdv) continue; - if (!LiveRegs || !LiveRegs[rx]) { + if (!LiveRegs[rx].Value) { setLiveReg(rx, pdv); continue; } // We have a live DomainValue from more than one predecessor. - if (LiveRegs[rx]->isCollapsed()) { + if (LiveRegs[rx].Value->isCollapsed()) { // We are already collapsed, but predecessor is not. Force him. - unsigned domain = LiveRegs[rx]->getFirstDomain(); - if (!pdv->isCollapsed() && pdv->hasDomain(domain)) - collapse(pdv, domain); + unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) + collapse(pdv, Domain); continue; } // Currently open, merge in predecessor. if (!pdv->isCollapsed()) - merge(LiveRegs[rx], pdv); + merge(LiveRegs[rx].Value, pdv); else force(rx, pdv->getFirstDomain()); } } - return seenBackEdge; + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n")); } void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { + assert(LiveRegs && "Must enter basic block first."); // Save live registers at end of MBB - used by enterBasicBlock(). // Also use LiveOuts as a visited set to detect back-edges. - if (!LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second && LiveRegs) { + bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second; + + if (First) { + // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to + // the end of this block instead of the beginning. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + LiveRegs[i].Def -= CurInstr; + } else { // Insertion failed, this must be the second pass. // Release all the DomainValues instead of keeping them. for (unsigned i = 0, e = NumRegs; i != e; ++i) - release(LiveRegs[i]); + release(LiveRegs[i].Value); delete[] LiveRegs; } LiveRegs = 0; @@ -380,15 +433,75 @@ void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { void ExeDepsFix::visitInstr(MachineInstr *MI) { if (MI->isDebugValue()) return; - ++Distance; - std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(MI); - if (domp.first) - if (domp.second) - visitSoftInstr(MI, domp.second); + + // Update instructions with explicit execution domains. + std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI); + if (DomP.first) { + if (DomP.second) + visitSoftInstr(MI, DomP.second); else - visitHardInstr(MI, domp.first); - else if (LiveRegs) - visitGenericInstr(MI); + visitHardInstr(MI, DomP.first); + } + + // Process defs to track register ages, and kill values clobbered by generic + // instructions. + processDefs(MI, !DomP.first); +} + +// Update def-ages for registers defined by MI. +// If Kill is set, also kill off DomainValues clobbered by the defs. +void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { + assert(!MI->isDebugValue() && "Won't process debug values"); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MCID.isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isImplicit()) + break; + if (MO.isUse()) + continue; + int rx = regIndex(MO.getReg()); + if (rx < 0) + continue; + + // This instruction explicitly defines rx. + DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + << '\t' << *MI); + + // How many instructions since rx was last written? + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + LiveRegs[rx].Def = CurInstr; + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + + // Verify clearance before partial register updates. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (!Pref) + continue; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + TII->breakPartialRegDependency(MI, i, TRI); + continue; + } + + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK.\n"); + continue; + } + + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + } + + ++CurInstr; } // A hard instruction only works in one domain. All input registers will be @@ -430,7 +543,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { if (!mo.isReg()) continue; int rx = regIndex(mo.getReg()); if (rx < 0) continue; - if (DomainValue *dv = LiveRegs[rx]) { + if (DomainValue *dv = LiveRegs[rx].Value) { // Bitmask of domains that dv and available have in common. unsigned common = dv->getCommonDomains(available); // Is it possible to use this collapsed register for free? @@ -459,52 +572,53 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. - SmallVector<DomainValue*,4> doms; + SmallVector<LiveReg, 4> Regs; for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; - DomainValue *dv = LiveRegs[rx]; + const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. - if (!dv->getCommonDomains(available)) { - kill(*i); + if (!LR.Value->getCommonDomains(available)) { + kill(rx); continue; } - // sorted, uniqued insert. - bool inserted = false; - for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); - i != e && !inserted; ++i) { - if (dv == *i) - inserted = true; - else if (dv->Dist < (*i)->Dist) { - inserted = true; - doms.insert(i, dv); + // Sorted insertion. + bool Inserted = false; + for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end(); + i != e && !Inserted; ++i) { + if (LR.Def < i->Def) { + Inserted = true; + Regs.insert(i, LR); } } - if (!inserted) - doms.push_back(dv); + if (!Inserted) + Regs.push_back(LR); } // doms are now sorted in order of appearance. Try to merge them all, giving // priority to the latest ones. DomainValue *dv = 0; - while (!doms.empty()) { + while (!Regs.empty()) { if (!dv) { - dv = doms.pop_back_val(); + dv = Regs.pop_back_val().Value; continue; } - DomainValue *latest = doms.pop_back_val(); - if (merge(dv, latest)) continue; + DomainValue *Latest = Regs.pop_back_val().Value; + // Skip already merged values. + if (Latest == dv || Latest->Next) + continue; + if (merge(dv, Latest)) + continue; // If latest didn't merge, it is useless now. Kill all registers using it. for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) - if (LiveRegs[*i] == latest) + if (LiveRegs[*i].Value == Latest) kill(*i); } // dv is the DomainValue we are going to use for this instruction. if (!dv) dv = alloc(); - dv->Dist = Distance; dv->AvailableDomains = available; dv->Instrs.push_back(mi); @@ -514,32 +628,23 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { if (!mo.isReg()) continue; int rx = regIndex(mo.getReg()); if (rx < 0) continue; - if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { + if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { kill(rx); setLiveReg(rx, dv); } } } -void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { - // Process explicit defs, kill any relevant registers redefined. - for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - kill(rx); - } -} - bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); LiveRegs = 0; - Distance = 0; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " + << RC->getName() << " **********\n"); + // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; @@ -567,7 +672,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; - if (enterBasicBlock(MBB)) + enterBasicBlock(MBB); + if (SeenUnknownBackEdge) Loops.push_back(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) @@ -580,6 +686,10 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = Loops.size(); i != e; ++i) { MachineBasicBlock *MBB = Loops[i]; enterBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) + if (!I->isDebugValue()) + processDefs(I, false); leaveBasicBlock(MBB); } @@ -590,8 +700,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { if (FI == LiveOuts.end() || !FI->second) continue; for (unsigned i = 0, e = NumRegs; i != e; ++i) - if (FI->second[i]) - release(FI->second[i]); + if (FI->second[i].Value) + release(FI->second[i].Value); delete[] FI->second; } LiveOuts.clear(); diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 766c6ee..9349797 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -97,6 +97,7 @@ GCStrategy::GCStrategy() : CustomReadBarriers(false), CustomWriteBarriers(false), CustomRoots(false), + CustomSafePoints(false), InitRoots(true), UsesMetadata(false) {} @@ -116,6 +117,14 @@ bool GCStrategy::performCustomLowering(Function &F) { return 0; } + +bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { + dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; + llvm_unreachable(0); + return 0; +} + + GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { GCFunctionInfo *FI = new GCFunctionInfo(F, *this); Functions.push_back(FI); @@ -405,9 +414,13 @@ bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // Find the size of the stack frame. FI->setFrameSize(MF.getFrameInfo()->getStackSize()); - + // Find all safe points. - FindSafePoints(MF); + if (FI->getStrategy().customSafePoints()) { + FI->getStrategy().findCustomSafePoints(*FI, MF); + } else { + FindSafePoints(MF); + } // Find the stack offsets for all roots. FindStackOffsets(MF); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index d1e3f1a..59907d9 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -578,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex()); + LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true)); assert(SrcLR && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = (SrcLR->end == VNI->def); @@ -665,8 +665,8 @@ void InlineSpiller::analyzeSiblingValues() { /// a spill at a better location. bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { SlotIndex Idx = LIS.getInstructionIndex(CopyMI); - VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex()); - assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy"); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); + assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); SibValueMap::iterator I = SibValues.find(VNI); if (I == SibValues.end()) return false; @@ -726,7 +726,6 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { MRI.getRegClass(SVI.SpillReg), &TRI); --MII; // Point to store instruction. LIS.InsertMachineInstrInMaps(MII); - VRM.addSpillSlotUse(StackSlot, MII); DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); ++NumSpills; @@ -770,9 +769,9 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { if (unsigned DstReg = isFullCopyOf(MI, Reg)) { if (isSibling(DstReg)) { LiveInterval &DstLI = LIS.getInterval(DstReg); - VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex()); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); assert(DstVNI && "Missing defined value"); - assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot"); + assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot"); WorkList.push_back(std::make_pair(&DstLI, DstVNI)); } continue; @@ -811,7 +810,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)); if (PVNI) WorkList.push_back(std::make_pair(LI, PVNI)); } @@ -824,7 +823,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { continue; LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); - VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex()); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); assert(SnipVNI && "Snippet undefined before copy"); WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); } while (!WorkList.empty()); @@ -833,7 +832,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { - SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex(); + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); if (!ParentVNI) { @@ -907,7 +906,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; @@ -1046,8 +1045,6 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, if (!FoldMI) return false; LIS.ReplaceMachineInstrInMaps(MI, FoldMI); - if (!LoadMI) - VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); // TII.foldMemoryOperand may have left some implicit operands on the @@ -1080,8 +1077,7 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. - SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); - VRM.addSpillSlotUse(StackSlot, MI); + SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, LIS.getVNInfoAllocator()); @@ -1096,8 +1092,7 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. - SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); - VRM.addSpillSlotUse(StackSlot, MI); + SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); @@ -1146,8 +1141,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); - if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex())) + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) if (SlotIndex::isSameInstr(Idx, VNI->def)) Idx = VNI->def; @@ -1211,7 +1206,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // This instruction defines a dead value. We don't need to spill it, // but do create a live range for the dead value. VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI)); + NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); } } @@ -1254,7 +1249,6 @@ void InlineSpiller::spillAll() { MachineInstr *MI = RI.skipInstruction();) { assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. - VRM.RemoveMachineInstrFromMaps(MI); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 3dfe4c0..eb54baa7 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -468,7 +468,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { // DBG_VALUE has no slot index, use the previous instruction instead. SlotIndex Idx = MBBI == MBB->begin() ? LIS->getMBBStartIdx(MBB) : - LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex(); + LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot(); // Handle consecutive DBG_VALUE instructions with the same slot index. do { if (handleDebugValue(MBBI, Idx)) { @@ -575,15 +575,15 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // Is LocNo extended to reach this copy? If not, another def may be blocking // it, or we are looking at a wrong value of LI. SlotIndex Idx = LIS.getInstructionIndex(MI); - LocMap::iterator I = locInts.find(Idx.getUseIndex()); + LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); if (!I.valid() || I.value() != LocNo) continue; if (!LIS.hasInterval(DstReg)) continue; LiveInterval *DstLI = &LIS.getInterval(DstReg); - const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex()); - assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value"); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); CopyValues.push_back(std::make_pair(DstLI, DstVNI)); } @@ -889,8 +889,7 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { // index is no longer available. That means the user value is in a // non-existent sub-register, and %noreg is exactly what we want. Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); - } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT && - VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) { + } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { // FIXME: Translate SubIdx to a stackslot offset. Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); } else { diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index b69945a..a85639f 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -680,15 +680,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Connect to values live out of predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) - if (const VNInfo *PVNI = - LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot())) + if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI))) EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. // FIXME: This could be coincidental. Should we really check for a tied // operand constraint? // Note that VNI->def may be a use slot for an early clobber def. - if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot())) + if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def)) EqClass.join(VNI->id, UVNI->id); } } @@ -716,7 +715,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], continue; // DBG_VALUE instructions should have been eliminated earlier. SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + Idx = Idx.getRegSlot(MO.isUse()); const VNInfo *VNI = LI.getVNInfoAt(Idx); assert(VNI && "Interval not live at use."); MO.setReg(LIV[getEqClass(VNI)]->reg); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index b1e202a..edcfebe 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -52,8 +52,6 @@ static cl::opt<bool> DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); STATISTIC(numIntervals , "Number of original intervals"); -STATISTIC(numFolds , "Number of loads/stores folded into instructions"); -STATISTIC(numSplits , "Number of intervals split"); char LiveIntervals::ID = 0; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", @@ -149,103 +147,6 @@ void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } -bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, - VirtRegMap &vrm, unsigned reg) { - // We don't handle fancy stuff crossing basic block boundaries - if (li.ranges.size() != 1) - return true; - const LiveRange &range = li.ranges.front(); - SlotIndex idx = range.start.getBaseIndex(); - SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex(); - - // Skip deleted instructions - MachineInstr *firstMI = getInstructionFromIndex(idx); - while (!firstMI && idx != end) { - idx = idx.getNextIndex(); - firstMI = getInstructionFromIndex(idx); - } - if (!firstMI) - return false; - - // Find last instruction in range - SlotIndex lastIdx = end.getPrevIndex(); - MachineInstr *lastMI = getInstructionFromIndex(lastIdx); - while (!lastMI && lastIdx != idx) { - lastIdx = lastIdx.getPrevIndex(); - lastMI = getInstructionFromIndex(lastIdx); - } - if (!lastMI) - return false; - - // Range cannot cross basic block boundaries or terminators - MachineBasicBlock *MBB = firstMI->getParent(); - if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator()) - return true; - - MachineBasicBlock::const_iterator E = lastMI; - ++E; - for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) { - const MachineInstr &MI = *I; - - // Allow copies to and from li.reg - if (MI.isCopy()) - if (MI.getOperand(0).getReg() == li.reg || - MI.getOperand(1).getReg() == li.reg) - continue; - - // Check for operands using reg - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand& mop = MI.getOperand(i); - if (!mop.isReg()) - continue; - unsigned PhysReg = mop.getReg(); - if (PhysReg == 0 || PhysReg == li.reg) - continue; - if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { - if (!vrm.hasPhys(PhysReg)) - continue; - PhysReg = vrm.getPhys(PhysReg); - } - if (PhysReg && tri_->regsOverlap(PhysReg, reg)) - return true; - } - } - - // No conflicts found. - return false; -} - -bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, - SmallPtrSet<MachineInstr*,32> &JoinedCopies) { - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (SlotIndex index = I->start.getBaseIndex(), - end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - index != end; - index = index.getNextIndex()) { - MachineInstr *MI = getInstructionFromIndex(index); - if (!MI) - continue; // skip deleted instructions - - if (JoinedCopies.count(MI)) - continue; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned PhysReg = MO.getReg(); - if (PhysReg == 0 || PhysReg == Reg || - TargetRegisterInfo::isVirtualRegister(PhysReg)) - continue; - if (tri_->regsOverlap(Reg, PhysReg)) - return true; - } - } - } - - return false; -} - static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { unsigned Reg = MI.getOperand(MOIdx).getReg(); @@ -271,9 +172,9 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, if (!MO.getSubReg() || MO.isEarlyClobber()) return false; - SlotIndex RedefIndex = MIIdx.getDefIndex(); + SlotIndex RedefIndex = MIIdx.getRegSlot(); const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); if (DefMI != 0) { return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; @@ -296,11 +197,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. - SlotIndex defIndex = MIIdx.getDefIndex(); - // Earlyclobbers move back one, so that they overlap the live range - // of inputs. - if (MO.isEarlyClobber()) - defIndex = MIIdx.getUseIndex(); + SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); // Make sure the first definition is not a partial redefinition. Add an // <imp-def> of the full register. @@ -334,9 +231,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // FIXME: what about dead vars? SlotIndex killIdx; if (vi.Kills[0] != mi) - killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex(); + killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot(); else - killIdx = defIndex.getStoreIndex(); + killIdx = defIndex.getDeadSlot(); // If the kill happens after the definition, we have an intra-block // live range. @@ -384,7 +281,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { MachineInstr *Kill = vi.Kills[i]; SlotIndex Start = getMBBStartIdx(Kill->getParent()); - SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex(); + SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot(); // Create interval with one of a NEW value number. Note that this value // number isn't actually defined by an instruction, weird huh? :) @@ -422,14 +319,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - SlotIndex RedefIndex = MIIdx.getDefIndex(); - if (MO.isEarlyClobber()) - RedefIndex = MIIdx.getUseIndex(); + SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); VNInfo *OldValNo = OldLR->valno; - SlotIndex DefIndex = OldValNo->def.getDefIndex(); + SlotIndex DefIndex = OldValNo->def.getRegSlot(); // Delete the previous value, which should be short and continuous, // because the 2-addr copy must be in the same MBB as the redef. @@ -455,7 +350,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(), + interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), OldValNo)); DEBUG({ @@ -467,9 +362,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // live until the end of the block. We've already taken care of the // rest of the live range. - SlotIndex defIndex = MIIdx.getDefIndex(); + SlotIndex defIndex = MIIdx.getRegSlot(); if (MO.isEarlyClobber()) - defIndex = MIIdx.getUseIndex(); + defIndex = MIIdx.getRegSlot(true); VNInfo *ValNo; MachineInstr *CopyMI = NULL; @@ -501,10 +396,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex.getDefIndex(); - // Earlyclobbers move back one. - if (MO.isEarlyClobber()) - start = MIIdx.getUseIndex(); + SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber()); SlotIndex end = start; // If it is not used after definition, it is considered dead at @@ -514,7 +406,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // advance below compensates. if (MO.isDead()) { DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); goto exit; } @@ -531,21 +423,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); goto exit; } else { int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_); if (DefIdx != -1) { if (mi->isRegTiedToUseOperand(DefIdx)) { // Two-address instruction. - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); } else { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that // defines it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); } goto exit; } @@ -558,7 +450,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // instruction where we know it's dead is if it is live-in to the function // and never used. Another possible case is the implicit use of the // physical register has been deleted by two-address pass. - end = start.getStoreIndex(); + end = start.getDeadSlot(); exit: assert(start < end && "did not find end of interval?"); @@ -621,7 +513,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, while (mi != E) { if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); SeenDefUse = true; break; } else if (mi->definesRegister(interval.reg, tri_)) { @@ -630,7 +522,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); SeenDefUse = true; break; } @@ -646,7 +538,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, if (!SeenDefUse) { if (isAlias) { DEBUG(dbgs() << " dead"); - end = MIIdx.getStoreIndex(); + end = MIIdx.getDeadSlot(); } else { DEBUG(dbgs() << " live through"); end = getMBBEndIdx(MBB); @@ -766,8 +658,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, MachineInstr *UseMI = I.skipInstruction();) { if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; - SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex(); - VNInfo *VNI = li->getVNInfoAt(Idx); + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + // Note: This intentionally picks up the wrong VNI in case of an EC redef. + // See below. + VNInfo *VNI = li->getVNInfoBefore(Idx); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting <undef> flags @@ -777,11 +671,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, << *li << '\n'); continue; } - if (VNI->def == Idx) { - // Special case: An early-clobber tied operand reads and writes the - // register one slot early. - Idx = Idx.getPrevSlot(); - VNI = li->getVNInfoAt(Idx); + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. The getVNInfoBefore call above would have + // picked up the value defined by UseMI. Adjust the kill slot and value. + if (SlotIndex::isSameInstr(VNI->def, Idx)) { + Idx = VNI->def; + VNI = li->getVNInfoBefore(Idx); assert(VNI && "Early-clobber tied value not available"); } WorkList.push_back(std::make_pair(Idx, VNI)); @@ -794,14 +689,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); - - // A use tied to an early-clobber def ends at the load slot and isn't caught - // above. Catch it here instead. This probably only ever happens for inline - // assembly. - if (VNI->def.isUse()) - if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex())) - WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI)); + NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); } // Keep track of the PHIs that are in use. @@ -812,11 +700,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); - const MachineBasicBlock *MBB = getMBBFromIndex(Idx); + const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot()); SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) { + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -827,9 +715,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + SlotIndex Stop = getMBBEndIdx(*PI); // A predecessor is not required to have a live-out value for a PHI. - if (VNInfo *PVNI = li->getVNInfoAt(Stop)) + if (VNInfo *PVNI = li->getVNInfoBefore(Stop)) WorkList.push_back(std::make_pair(Stop, PVNI)); } continue; @@ -837,15 +725,16 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI)); + NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); - assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor"); + SlotIndex Stop = getMBBEndIdx(*PI); + assert(li->getVNInfoBefore(Stop) == VNI && + "Wrong value out of predecessor"); WorkList.push_back(std::make_pair(Stop, VNI)); } } @@ -859,7 +748,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); assert(LII != NewLI.end() && "Missing live range for PHI"); - if (LII->end != VNI->def.getNextSlot()) + if (LII->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. @@ -924,8 +813,8 @@ void LiveIntervals::addKillFlags() { // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { - // A LOAD index indicates an MBB edge. - if (RI->end.isLoad()) + // A block index indicates an MBB edge. + if (RI->end.isBlock()) continue; MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) @@ -1011,14 +900,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, return true; } -/// isReMaterializable - Returns true if the definition MI of the specified -/// val# of the specified interval is re-materializable. -bool LiveIntervals::isReMaterializable(const LiveInterval &li, - const VNInfo *ValNo, MachineInstr *MI) { - bool Dummy2; - return isReMaterializable(li, ValNo, MI, 0, Dummy2); -} - /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. bool @@ -1044,107 +925,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, return true; } -/// FilterFoldedOps - Filter out two-address use operands. Return -/// true if it finds any issue with the operands that ought to prevent -/// folding. -static bool FilterFoldedOps(MachineInstr *MI, - SmallVector<unsigned, 2> &Ops, - unsigned &MRInfo, - SmallVector<unsigned, 2> &FoldOps) { - MRInfo = 0; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - unsigned OpIdx = Ops[i]; - MachineOperand &MO = MI->getOperand(OpIdx); - // FIXME: fold subreg use. - if (MO.getSubReg()) - return true; - if (MO.isDef()) - MRInfo |= (unsigned)VirtRegMap::isMod; - else { - // Filter out two-address use operand(s). - if (MI->isRegTiedToDefOperand(OpIdx)) { - MRInfo = VirtRegMap::isModRef; - continue; - } - MRInfo |= (unsigned)VirtRegMap::isRef; - } - FoldOps.push_back(OpIdx); - } - return false; -} - - -/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from -/// slot / to reg or any rematerialized load into ith operand of specified -/// MI. If it is successul, MI is updated with the newly created MI and -/// returns true. -bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, - VirtRegMap &vrm, MachineInstr *DefMI, - SlotIndex InstrIdx, - SmallVector<unsigned, 2> &Ops, - bool isSS, int Slot, unsigned Reg) { - // If it is an implicit def instruction, just delete it. - if (MI->isImplicitDef()) { - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++numFolds; - return true; - } - - // Filter the list of operand indexes that are to be folded. Abort if - // any operand will prevent folding. - unsigned MRInfo = 0; - SmallVector<unsigned, 2> FoldOps; - if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) - return false; - - // The only time it's safe to fold into a two address instruction is when - // it's folding reload and spill from / into a spill stack slot. - if (DefMI && (MRInfo & VirtRegMap::isMod)) - return false; - - MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot) - : tii_->foldMemoryOperand(MI, FoldOps, DefMI); - if (fmi) { - // Remember this instruction uses the spill slot. - if (isSS) vrm.addSpillSlotUse(Slot, fmi); - - // Attempt to fold the memory reference into the instruction. If - // we can do this, we don't need to insert spill code. - if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) - vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); - vrm.transferSpillPts(MI, fmi); - vrm.transferRestorePts(MI, fmi); - vrm.transferEmergencySpills(MI, fmi); - ReplaceMachineInstrInMaps(MI, fmi); - MI->eraseFromParent(); - MI = fmi; - ++numFolds; - return true; - } - return false; -} - -/// canFoldMemoryOperand - Returns true if the specified load / store -/// folding is possible. -bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI, - SmallVector<unsigned, 2> &Ops, - bool ReMat) const { - // Filter the list of operand indexes that are to be folded. Abort if - // any operand will prevent folding. - unsigned MRInfo = 0; - SmallVector<unsigned, 2> FoldOps; - if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) - return false; - - // It's only legal to remat for a use, not a def. - if (ReMat && (MRInfo & VirtRegMap::isMod)) - return false; - - return tii_->canFoldMemoryOperand(MI, FoldOps); -} - bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { LiveInterval::Ranges::const_iterator itr = li.ranges.begin(); @@ -1164,554 +944,6 @@ bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { return true; } -/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of -/// interval on to-be re-materialized operands of MI) with new register. -void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, - MachineInstr *MI, unsigned NewVReg, - VirtRegMap &vrm) { - // There is an implicit use. That means one of the other operand is - // being remat'ed and the remat'ed instruction has li.reg as an - // use operand. Make sure we rewrite that as well. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (!vrm.isReMaterialized(Reg)) - continue; - MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg); - MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg); - if (UseMO) - UseMO->setReg(NewVReg); - } -} - -/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions -/// for addIntervalsForSpills to rewrite uses / defs for the given live range. -bool LiveIntervals:: -rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, SlotIndex index, SlotIndex end, - MachineInstr *MI, - MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, - const TargetRegisterClass* rc, - SmallVector<int, 4> &ReMatIds, - const MachineLoopInfo *loopInfo, - unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse, - DenseMap<unsigned,unsigned> &MBBVRegsMap, - std::vector<LiveInterval*> &NewLIs) { - bool CanFold = false; - RestartInstruction: - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg()) - continue; - unsigned Reg = mop.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (Reg != li.reg) - continue; - - bool TryFold = !DefIsReMat; - bool FoldSS = true; // Default behavior unless it's a remat. - int FoldSlot = Slot; - if (DefIsReMat) { - // If this is the rematerializable definition MI itself and - // all of its uses are rematerialized, simply delete it. - if (MI == ReMatOrigDefMI && CanDelete) { - DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: " - << *MI << '\n'); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - break; - } - - // If def for this use can't be rematerialized, then try folding. - // If def is rematerializable and it's a load, also try folding. - TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad)); - if (isLoad) { - // Try fold loads (from stack slot, constant pool, etc.) into uses. - FoldSS = isLoadSS; - FoldSlot = LdSlot; - } - } - - // Scan all of the operands of this instruction rewriting operands - // to use NewVReg instead of li.reg as appropriate. We do this for - // two reasons: - // - // 1. If the instr reads the same spilled vreg multiple times, we - // want to reuse the NewVReg. - // 2. If the instr is a two-addr instruction, we are required to - // keep the src/dst regs pinned. - // - // Keep track of whether we replace a use and/or def so that we can - // create the spill interval with the appropriate range. - SmallVector<unsigned, 2> Ops; - tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); - - // Create a new virtual register for the spill interval. - // Create the new register now so we can map the fold instruction - // to the new register so when it is unfolded we get the correct - // answer. - bool CreatedNewVReg = false; - if (NewVReg == 0) { - NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - CreatedNewVReg = true; - - // The new virtual register should get the same allocation hints as the - // old one. - std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg); - if (Hint.first || Hint.second) - mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second); - } - - if (!TryFold) - CanFold = false; - else { - // Do not fold load / store here if we are splitting. We'll find an - // optimal point to insert a load / store later. - if (!TrySplit) { - if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, - Ops, FoldSS, FoldSlot, NewVReg)) { - // Folding the load/store can completely change the instruction in - // unpredictable ways, rescan it from the beginning. - - if (FoldSS) { - // We need to give the new vreg the same stack slot as the - // spilled interval. - vrm.assignVirt2StackSlot(NewVReg, FoldSlot); - } - - HasUse = false; - HasDef = false; - CanFold = false; - if (isNotInMIMap(MI)) - break; - goto RestartInstruction; - } - } else { - // We'll try to fold it later if it's profitable. - CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat); - } - } - - mop.setReg(NewVReg); - if (mop.isImplicit()) - rewriteImplicitOps(li, MI, NewVReg, vrm); - - // Reuse NewVReg for other reads. - bool HasEarlyClobber = false; - for (unsigned j = 0, e = Ops.size(); j != e; ++j) { - MachineOperand &mopj = MI->getOperand(Ops[j]); - mopj.setReg(NewVReg); - if (mopj.isImplicit()) - rewriteImplicitOps(li, MI, NewVReg, vrm); - if (mopj.isEarlyClobber()) - HasEarlyClobber = true; - } - - if (CreatedNewVReg) { - if (DefIsReMat) { - vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); - if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { - // Each valnum may have its own remat id. - ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); - } else { - vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]); - } - if (!CanDelete || (HasUse && HasDef)) { - // If this is a two-addr instruction then its use operands are - // rematerializable but its def is not. It should be assigned a - // stack slot. - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - } else { - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - } else if (HasUse && HasDef && - vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) { - // If this interval hasn't been assigned a stack slot (because earlier - // def is a deleted remat def), do it now. - assert(Slot != VirtRegMap::NO_STACK_SLOT); - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - - // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI. - if (DefIsReMat && ImpUse) - MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); - - // Create a new register interval for this spill / remat. - LiveInterval &nI = getOrCreateInterval(NewVReg); - if (CreatedNewVReg) { - NewLIs.push_back(&nI); - MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg)); - if (TrySplit) - vrm.setIsSplitFromReg(NewVReg, li.reg); - } - - if (HasUse) { - if (CreatedNewVReg) { - LiveRange LR(index.getLoadIndex(), index.getDefIndex(), - nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } else { - // Extend the split live interval to this def / use. - SlotIndex End = index.getDefIndex(); - LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, - nI.getValNumInfo(nI.getNumValNums()-1)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } - } - if (HasDef) { - // An early clobber starts at the use slot, except for an early clobber - // tied to a use operand (yes, that is a thing). - LiveRange LR(HasEarlyClobber && !HasUse ? - index.getUseIndex() : index.getDefIndex(), - index.getStoreIndex(), - nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } - - DEBUG({ - dbgs() << "\t\t\t\tAdded new interval: "; - nI.print(dbgs(), tri_); - dbgs() << '\n'; - }); - } - return CanFold; -} -bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, - const VNInfo *VNI, - MachineBasicBlock *MBB, - SlotIndex Idx) const { - return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB)); -} - -/// RewriteInfo - Keep track of machine instrs that will be rewritten -/// during spilling. -namespace { - struct RewriteInfo { - SlotIndex Index; - MachineInstr *MI; - RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {} - }; - - struct RewriteInfoCompare { - bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const { - return LHS.Index < RHS.Index; - } - }; -} - -void LiveIntervals:: -rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, - LiveInterval::Ranges::const_iterator &I, - MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, - const TargetRegisterClass* rc, - SmallVector<int, 4> &ReMatIds, - const MachineLoopInfo *loopInfo, - BitVector &SpillMBBs, - DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes, - BitVector &RestoreMBBs, - DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes, - DenseMap<unsigned,unsigned> &MBBVRegsMap, - std::vector<LiveInterval*> &NewLIs) { - bool AllCanFold = true; - unsigned NewVReg = 0; - SlotIndex start = I->start.getBaseIndex(); - SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - - // First collect all the def / use in this live range that will be rewritten. - // Make sure they are sorted according to instruction index. - std::vector<RewriteInfo> RewriteMIs; - for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), - re = mri_->reg_end(); ri != re; ) { - MachineInstr *MI = &*ri; - MachineOperand &O = ri.getOperand(); - ++ri; - if (MI->isDebugValue()) { - // Modify DBG_VALUE now that the value is in a spill slot. - if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) { - uint64_t Offset = MI->getOperand(1).getImm(); - const MDNode *MDPtr = MI->getOperand(2).getMetadata(); - DebugLoc DL = MI->getDebugLoc(); - int FI = isLoadSS ? LdSlot : (int)Slot; - if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI, - Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - ReplaceMachineInstrInMaps(MI, NewDV); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - continue; - } - } - - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - continue; - } - assert(!(O.isImplicit() && O.isUse()) && - "Spilling register that's used as implicit use?"); - SlotIndex index = getInstructionIndex(MI); - if (index < start || index >= end) - continue; - - if (O.isUndef()) - // Must be defined by an implicit def. It should not be spilled. Note, - // this is for correctness reason. e.g. - // 8 %reg1024<def> = IMPLICIT_DEF - // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - continue; - RewriteMIs.push_back(RewriteInfo(index, MI)); - } - std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); - - unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0; - // Now rewrite the defs and uses. - for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { - RewriteInfo &rwi = RewriteMIs[i]; - ++i; - SlotIndex index = rwi.Index; - MachineInstr *MI = rwi.MI; - // If MI def and/or use the same register multiple times, then there - // are multiple entries. - while (i != e && RewriteMIs[i].MI == MI) { - assert(RewriteMIs[i].Index == index); - ++i; - } - MachineBasicBlock *MBB = MI->getParent(); - - if (ImpUse && MI != ReMatDefMI) { - // Re-matting an instruction with virtual register use. Prevent interval - // from being spilled. - getInterval(ImpUse).markNotSpillable(); - } - - unsigned MBBId = MBB->getNumber(); - unsigned ThisVReg = 0; - if (TrySplit) { - DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId); - if (NVI != MBBVRegsMap.end()) { - ThisVReg = NVI->second; - // One common case: - // x = use - // ... - // ... - // def = ... - // = use - // It's better to start a new interval to avoid artificially - // extend the new interval. - if (MI->readsWritesVirtualRegister(li.reg) == - std::make_pair(false,true)) { - MBBVRegsMap.erase(MBB->getNumber()); - ThisVReg = 0; - } - } - } - - bool IsNew = ThisVReg == 0; - if (IsNew) { - // This ends the previous live interval. If all of its def / use - // can be folded, give it a low spill weight. - if (NewVReg && TrySplit && AllCanFold) { - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.weight /= 10.0F; - } - AllCanFold = true; - } - NewVReg = ThisVReg; - - bool HasDef = false; - bool HasUse = false; - bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit, - index, end, MI, ReMatOrigDefMI, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg, - ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs); - if (!HasDef && !HasUse) - continue; - - AllCanFold &= CanFold; - - // Update weight of spill interval. - LiveInterval &nI = getOrCreateInterval(NewVReg); - if (!TrySplit) { - // The spill weight is now infinity as it cannot be spilled again. - nI.markNotSpillable(); - continue; - } - - // Keep track of the last def and first use in each MBB. - if (HasDef) { - if (MI != ReMatOrigDefMI || !CanDelete) { - bool HasKill = false; - if (!HasUse) - HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex()); - else { - // If this is a two-address code, then this index starts a new VNInfo. - const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex()); - if (VNI) - HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex()); - } - DenseMap<unsigned, std::vector<SRInfo> >::iterator SII = - SpillIdxes.find(MBBId); - if (!HasKill) { - if (SII == SpillIdxes.end()) { - std::vector<SRInfo> S; - S.push_back(SRInfo(index, NewVReg, true)); - SpillIdxes.insert(std::make_pair(MBBId, S)); - } else if (SII->second.back().vreg != NewVReg) { - SII->second.push_back(SRInfo(index, NewVReg, true)); - } else if (index > SII->second.back().index) { - // If there is an earlier def and this is a two-address - // instruction, then it's not possible to fold the store (which - // would also fold the load). - SRInfo &Info = SII->second.back(); - Info.index = index; - Info.canFold = !HasUse; - } - SpillMBBs.set(MBBId); - } else if (SII != SpillIdxes.end() && - SII->second.back().vreg == NewVReg && - index > SII->second.back().index) { - // There is an earlier def that's not killed (must be two-address). - // The spill is no longer needed. - SII->second.pop_back(); - if (SII->second.empty()) { - SpillIdxes.erase(MBBId); - SpillMBBs.reset(MBBId); - } - } - } - } - - if (HasUse) { - DenseMap<unsigned, std::vector<SRInfo> >::iterator SII = - SpillIdxes.find(MBBId); - if (SII != SpillIdxes.end() && - SII->second.back().vreg == NewVReg && - index > SII->second.back().index) - // Use(s) following the last def, it's not safe to fold the spill. - SII->second.back().canFold = false; - DenseMap<unsigned, std::vector<SRInfo> >::iterator RII = - RestoreIdxes.find(MBBId); - if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg) - // If we are splitting live intervals, only fold if it's the first - // use and there isn't another use later in the MBB. - RII->second.back().canFold = false; - else if (IsNew) { - // Only need a reload if there isn't an earlier def / use. - if (RII == RestoreIdxes.end()) { - std::vector<SRInfo> Infos; - Infos.push_back(SRInfo(index, NewVReg, true)); - RestoreIdxes.insert(std::make_pair(MBBId, Infos)); - } else { - RII->second.push_back(SRInfo(index, NewVReg, true)); - } - RestoreMBBs.set(MBBId); - } - } - - // Update spill weight. - unsigned loopDepth = loopInfo->getLoopDepth(MBB); - nI.weight += getSpillWeight(HasDef, HasUse, loopDepth); - } - - if (NewVReg && TrySplit && AllCanFold) { - // If all of its def / use can be folded, give it a low spill weight. - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.weight /= 10.0F; - } -} - -bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index, - unsigned vr, BitVector &RestoreMBBs, - DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { - if (!RestoreMBBs[Id]) - return false; - std::vector<SRInfo> &Restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = Restores.size(); i != e; ++i) - if (Restores[i].index == index && - Restores[i].vreg == vr && - Restores[i].canFold) - return true; - return false; -} - -void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index, - unsigned vr, BitVector &RestoreMBBs, - DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { - if (!RestoreMBBs[Id]) - return; - std::vector<SRInfo> &Restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = Restores.size(); i != e; ++i) - if (Restores[i].index == index && Restores[i].vreg) - Restores[i].index = SlotIndex(); -} - -/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being -/// spilled and create empty intervals for their uses. -void -LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, - const TargetRegisterClass* rc, - std::vector<LiveInterval*> &NewLIs) { - for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), - re = mri_->reg_end(); ri != re; ) { - MachineOperand &O = ri.getOperand(); - MachineInstr *MI = &*ri; - ++ri; - if (MI->isDebugValue()) { - // Remove debug info for now. - O.setReg(0U); - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - continue; - } - if (O.isDef()) { - assert(MI->isImplicitDef() && - "Register def was not rewritten?"); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - } else { - // This must be an use of an implicit_def so it's not part of the live - // interval. Create a new empty live interval for it. - // FIXME: Can we simply erase some of the instructions? e.g. Stores? - unsigned NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - vrm.setIsImplicitlyDefined(NewVReg); - NewLIs.push_back(&getOrCreateInterval(NewVReg)); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == li.reg) { - MO.setReg(NewVReg); - MO.setIsUndef(); - } - } - } - } -} - float LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // Limit the loop depth ridiculousness. @@ -1730,452 +962,15 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { return (isDef + isUse) * lc; } -static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { - for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) - NewLIs[i]->weight = - normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize()); -} - -std::vector<LiveInterval*> LiveIntervals:: -addIntervalsForSpills(const LiveInterval &li, - const SmallVectorImpl<LiveInterval*> *SpillIs, - const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { - assert(li.isSpillable() && "attempt to spill already spilled interval!"); - - DEBUG({ - dbgs() << "\t\t\t\tadding intervals for spills for interval: "; - li.print(dbgs(), tri_); - dbgs() << '\n'; - }); - - // Each bit specify whether a spill is required in the MBB. - BitVector SpillMBBs(mf_->getNumBlockIDs()); - DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes; - BitVector RestoreMBBs(mf_->getNumBlockIDs()); - DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes; - DenseMap<unsigned,unsigned> MBBVRegsMap; - std::vector<LiveInterval*> NewLIs; - const TargetRegisterClass* rc = mri_->getRegClass(li.reg); - - unsigned NumValNums = li.getNumValNums(); - SmallVector<MachineInstr*, 4> ReMatDefs; - ReMatDefs.resize(NumValNums, NULL); - SmallVector<MachineInstr*, 4> ReMatOrigDefs; - ReMatOrigDefs.resize(NumValNums, NULL); - SmallVector<int, 4> ReMatIds; - ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT); - BitVector ReMatDelete(NumValNums); - unsigned Slot = VirtRegMap::MAX_STACK_SLOT; - - // Spilling a split live interval. It cannot be split any further. Also, - // it's also guaranteed to be a single val# / range interval. - if (vrm.getPreSplitReg(li.reg)) { - vrm.setIsSplitFromReg(li.reg, 0); - // Unset the split kill marker on the last use. - SlotIndex KillIdx = vrm.getKillPoint(li.reg); - if (KillIdx != SlotIndex()) { - MachineInstr *KillMI = getInstructionFromIndex(KillIdx); - assert(KillMI && "Last use disappeared?"); - int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); - assert(KillOp != -1 && "Last use disappeared?"); - KillMI->getOperand(KillOp).setIsKill(false); - } - vrm.removeKillPoint(li.reg); - bool DefIsReMat = vrm.isReMaterialized(li.reg); - Slot = vrm.getStackSlot(li.reg); - assert(Slot != VirtRegMap::MAX_STACK_SLOT); - MachineInstr *ReMatDefMI = DefIsReMat ? - vrm.getReMaterializedMI(li.reg) : NULL; - int LdSlot = 0; - bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - bool isLoad = isLoadSS || - (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad())); - bool IsFirstRange = true; - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - // If this is a split live interval with multiple ranges, it means there - // are two-address instructions that re-defined the value. Only the - // first def can be rematerialized! - if (IsFirstRange) { - // Note ReMatOrigDefMI has already been deleted. - rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - false, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } else { - rewriteInstructionsForSpills(li, false, I, NULL, 0, - Slot, 0, false, false, false, - false, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } - IsFirstRange = false; - } - - handleSpilledImpDefs(li, vrm, rc, NewLIs); - normalizeSpillWeights(NewLIs); - return NewLIs; - } - - bool TrySplit = !intervalIsInOneMBB(li); - if (TrySplit) - ++numSplits; - bool NeedStackSlot = false; - for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); - i != e; ++i) { - const VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (VNI->isUnused()) - continue; // Dead val#. - // Is the def for the val# rematerializable? - MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); - bool dummy; - if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { - // Remember how to remat the def of this val#. - ReMatOrigDefs[VN] = ReMatDefMI; - // Original def may be modified so we have to make a copy here. - MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); - CloneMIs.push_back(Clone); - ReMatDefs[VN] = Clone; - - bool CanDelete = true; - if (VNI->hasPHIKill()) { - // A kill is a phi node, not all of its uses can be rematerialized. - // It must not be deleted. - CanDelete = false; - // Need a stack slot if there is any live range where uses cannot be - // rematerialized. - NeedStackSlot = true; - } - if (CanDelete) - ReMatDelete.set(VN); - } else { - // Need a stack slot if there is any live range where uses cannot be - // rematerialized. - NeedStackSlot = true; - } - } - - // One stack slot per live interval. - if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { - if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) - Slot = vrm.assignVirt2StackSlot(li.reg); - - // This case only occurs when the prealloc splitter has already assigned - // a stack slot to this vreg. - else - Slot = vrm.getStackSlot(li.reg); - } - - // Create new intervals and rewrite defs and uses. - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id]; - MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id]; - bool DefIsReMat = ReMatDefMI != NULL; - bool CanDelete = ReMatDelete[I->valno->id]; - int LdSlot = 0; - bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - bool isLoad = isLoadSS || - (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad()); - rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - CanDelete, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } - - // Insert spills / restores if we are splitting. - if (!TrySplit) { - handleSpilledImpDefs(li, vrm, rc, NewLIs); - normalizeSpillWeights(NewLIs); - return NewLIs; - } - - SmallPtrSet<LiveInterval*, 4> AddedKill; - SmallVector<unsigned, 2> Ops; - if (NeedStackSlot) { - int Id = SpillMBBs.find_first(); - while (Id != -1) { - std::vector<SRInfo> &spills = SpillIdxes[Id]; - for (unsigned i = 0, e = spills.size(); i != e; ++i) { - SlotIndex index = spills[i].index; - unsigned VReg = spills[i].vreg; - LiveInterval &nI = getOrCreateInterval(VReg); - bool isReMat = vrm.isReMaterialized(VReg); - MachineInstr *MI = getInstructionFromIndex(index); - bool CanFold = false; - bool FoundUse = false; - Ops.clear(); - if (spills[i].canFold) { - CanFold = true; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg() || MO.getReg() != VReg) - continue; - - Ops.push_back(j); - if (MO.isDef()) - continue; - if (isReMat || - (!FoundUse && !alsoFoldARestore(Id, index, VReg, - RestoreMBBs, RestoreIdxes))) { - // MI has two-address uses of the same register. If the use - // isn't the first and only use in the BB, then we can't fold - // it. FIXME: Move this to rewriteInstructionsForSpills. - CanFold = false; - break; - } - FoundUse = true; - } - } - // Fold the store into the def if possible. - bool Folded = false; - if (CanFold && !Ops.empty()) { - if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){ - Folded = true; - if (FoundUse) { - // Also folded uses, do not issue a load. - eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); - nI.removeRange(index.getLoadIndex(), index.getDefIndex()); - } - nI.removeRange(index.getDefIndex(), index.getStoreIndex()); - } - } - - // Otherwise tell the spiller to issue a spill. - if (!Folded) { - LiveRange *LR = &nI.ranges[nI.ranges.size()-1]; - bool isKill = LR->end == index.getStoreIndex(); - if (!MI->registerDefIsDead(nI.reg)) - // No need to spill a dead def. - vrm.addSpillPoint(VReg, isKill, MI); - if (isKill) - AddedKill.insert(&nI); - } - } - Id = SpillMBBs.find_next(Id); - } - } - - int Id = RestoreMBBs.find_first(); - while (Id != -1) { - std::vector<SRInfo> &restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = restores.size(); i != e; ++i) { - SlotIndex index = restores[i].index; - if (index == SlotIndex()) - continue; - unsigned VReg = restores[i].vreg; - LiveInterval &nI = getOrCreateInterval(VReg); - bool isReMat = vrm.isReMaterialized(VReg); - MachineInstr *MI = getInstructionFromIndex(index); - bool CanFold = false; - Ops.clear(); - if (restores[i].canFold) { - CanFold = true; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg() || MO.getReg() != VReg) - continue; - - if (MO.isDef()) { - // If this restore were to be folded, it would have been folded - // already. - CanFold = false; - break; - } - Ops.push_back(j); - } - } - - // Fold the load into the use if possible. - bool Folded = false; - if (CanFold && !Ops.empty()) { - if (!isReMat) - Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg); - else { - MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg); - int LdSlot = 0; - bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - // If the rematerializable def is a load, also try to fold it. - if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad()) - Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, - Ops, isLoadSS, LdSlot, VReg); - if (!Folded) { - unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI); - if (ImpUse) { - // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI and mark the register - // interval as unspillable. - LiveInterval &ImpLi = getInterval(ImpUse); - ImpLi.markNotSpillable(); - MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); - } - } - } - } - // If folding is not possible / failed, then tell the spiller to issue a - // load / rematerialization for us. - if (Folded) - nI.removeRange(index.getLoadIndex(), index.getDefIndex()); - else - vrm.addRestorePoint(VReg, MI); - } - Id = RestoreMBBs.find_next(Id); - } - - // Finalize intervals: add kills, finalize spill weights, and filter out - // dead intervals. - std::vector<LiveInterval*> RetNewLIs; - for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { - LiveInterval *LI = NewLIs[i]; - if (!LI->empty()) { - if (!AddedKill.count(LI)) { - LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; - SlotIndex LastUseIdx = LR->end.getBaseIndex(); - MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); - int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); - assert(UseIdx != -1); - if (!LastUse->isRegTiedToDefOperand(UseIdx)) { - LastUse->getOperand(UseIdx).setIsKill(); - vrm.addKillPoint(LI->reg, LastUseIdx); - } - } - RetNewLIs.push_back(LI); - } - } - - handleSpilledImpDefs(li, vrm, rc, RetNewLIs); - normalizeSpillWeights(RetNewLIs); - return RetNewLIs; -} - -/// hasAllocatableSuperReg - Return true if the specified physical register has -/// any super register that's allocatable. -bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { - for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) - if (allocatableRegs_[*AS] && hasInterval(*AS)) - return true; - return false; -} - -/// getRepresentativeReg - Find the largest super register of the specified -/// physical register. -unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { - // Find the largest super-register that is allocatable. - unsigned BestReg = Reg; - for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { - unsigned SuperReg = *AS; - if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) { - BestReg = SuperReg; - break; - } - } - return BestReg; -} - -/// getNumConflictsWithPhysReg - Return the number of uses and defs of the -/// specified interval that conflicts with the specified physical register. -unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, - unsigned PhysReg) const { - unsigned NumConflicts = 0; - const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg)); - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - MachineInstr *MI = O.getParent(); - if (MI->isDebugValue()) - continue; - SlotIndex Index = getInstructionIndex(MI); - if (pli.liveAt(Index)) - ++NumConflicts; - } - return NumConflicts; -} - -/// spillPhysRegAroundRegDefsUses - Spill the specified physical register -/// around all defs and uses of the specified interval. Return true if it -/// was able to cut its interval. -bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, - unsigned PhysReg, VirtRegMap &vrm) { - unsigned SpillReg = getRepresentativeReg(PhysReg); - - DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg) - << " represented by " << tri_->getName(SpillReg) << '\n'); - - for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS) - // If there are registers which alias PhysReg, but which are not a - // sub-register of the chosen representative super register. Assert - // since we can't handle it yet. - assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) || - tri_->isSuperRegister(*AS, SpillReg)); - - bool Cut = false; - SmallVector<unsigned, 4> PRegs; - if (hasInterval(SpillReg)) - PRegs.push_back(SpillReg); - for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR) - if (hasInterval(*SR)) - PRegs.push_back(*SR); - - DEBUG({ - dbgs() << "Trying to spill:"; - for (unsigned i = 0, e = PRegs.size(); i != e; ++i) - dbgs() << ' ' << tri_->getName(PRegs[i]); - dbgs() << '\n'; - }); - - SmallPtrSet<MachineInstr*, 8> SeenMIs; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - MachineInstr *MI = O.getParent(); - if (MI->isDebugValue() || SeenMIs.count(MI)) - continue; - SeenMIs.insert(MI); - SlotIndex Index = getInstructionIndex(MI); - bool LiveReg = false; - for (unsigned i = 0, e = PRegs.size(); i != e; ++i) { - unsigned PReg = PRegs[i]; - LiveInterval &pli = getInterval(PReg); - if (!pli.liveAt(Index)) - continue; - LiveReg = true; - SlotIndex StartIdx = Index.getLoadIndex(); - SlotIndex EndIdx = Index.getNextIndex().getBaseIndex(); - if (!pli.isInOneLiveRange(StartIdx, EndIdx)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, tm_); - } - report_fatal_error(Msg.str()); - } - pli.removeRange(StartIdx, EndIdx); - LiveReg = true; - } - if (!LiveReg) - continue; - DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI); - vrm.addEmergencySpill(SpillReg, MI); - Cut = true; - } - return Cut; -} - LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, MachineInstr* startInst) { LiveInterval& Interval = getOrCreateInterval(reg); VNInfo* VN = Interval.getNextValue( - SlotIndex(getInstructionIndex(startInst).getDefIndex()), + SlotIndex(getInstructionIndex(startInst).getRegSlot()), startInst, getVNInfoAllocator()); VN->setHasPHIKill(true); LiveRange LR( - SlotIndex(getInstructionIndex(startInst).getDefIndex()), + SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); Interval.addRange(LR); diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index b23f851..2f283b2 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -83,8 +83,8 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, SlotIndex UseIdx, LiveIntervals &lis) { - OrigIdx = OrigIdx.getUseIndex(); - UseIdx = UseIdx.getUseIndex(); + OrigIdx = OrigIdx.getRegSlot(true); + UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); if (!MO.isReg() || !MO.getReg() || MO.isDef()) @@ -151,7 +151,7 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) - .getDefIndex(); + .getRegSlot(); } void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { @@ -221,7 +221,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, while (!Dead.empty()) { MachineInstr *MI = Dead.pop_back_val(); assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); // Never delete inline asm. if (MI->isInlineAsm()) { diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 53a8779..304f167 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -115,7 +115,7 @@ public: /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : Blocks(1, BB), BlockToChain(BlockToChain) { + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } @@ -138,7 +138,6 @@ public: void merge(MachineBasicBlock *BB, BlockChain *Chain) { assert(BB); assert(!Blocks.empty()); - assert(Blocks.back()->isSuccessor(BB)); // Fast path in case we don't have a chain already. if (!Chain) { @@ -160,6 +159,12 @@ public: BlockToChain[*BI] = this; } } + + /// \brief Count of predecessors within the loop currently being processed. + /// + /// This count is updated at each loop we process to represent the number of + /// in-loop predecessors of this chain. + unsigned LoopPredecessors; }; } @@ -199,12 +204,25 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; - BlockChain *CreateChain(MachineBasicBlock *BB); - void mergeSuccessor(MachineBasicBlock *BB, BlockChain *Chain, - BlockFilterSet *Filter = 0); + void markChainSuccessors(BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter); + MachineBasicBlock *selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter); + MachineBasicBlock *getFirstUnplacedBlock(const BlockChain &PlacedChain, + ArrayRef<MachineBasicBlock *> Blocks, + unsigned &PrevUnplacedBlockIdx); + void buildChain(MachineBasicBlock *BB, BlockChain &Chain, + ArrayRef<MachineBasicBlock *> Blocks, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter = 0); void buildLoopChains(MachineFunction &F, MachineLoop &L); void buildCFGChains(MachineFunction &F); - void placeChainsTopologically(MachineFunction &F); void AlignLoops(MachineFunction &F); public: @@ -264,96 +282,256 @@ static std::string getBlockNum(MachineBasicBlock *BB) { } #endif -/// \brief Helper to create a new chain for a single BB. +/// \brief Mark a chain's successors as having one fewer preds. /// -/// Takes care of growing the Chains, setting up the BlockChain object, and any -/// debug checking logic. -/// \returns A pointer to the new BlockChain. -BlockChain *MachineBlockPlacement::CreateChain(MachineBasicBlock *BB) { - BlockChain *Chain = - new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); - return Chain; +/// When a chain is being merged into the "placed" chain, this routine will +/// quickly walk the successors of each block in the chain and mark them as +/// having one fewer active predecessor. It also adds any successors of this +/// chain which reach the zero-predecessor state to the worklist passed in. +void MachineBlockPlacement::markChainSuccessors( + BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter) { + // Walk all the blocks in this chain, marking their successors as having + // a predecessor placed. + for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); + CBI != CBE; ++CBI) { + // Add any successors for which this is the only un-placed in-loop + // predecessor to the worklist as a viable candidate for CFG-neutral + // placement. No subsequent placement of this block will violate the CFG + // shape, so we get to use heuristics to choose a favorable placement. + for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), + SE = (*CBI)->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Disregard edges within a fixed chain, or edges to the loop header. + if (&Chain == &SuccChain || *SI == LoopHeaderBB) + continue; + + // This is a cross-chain edge that is within the loop, so decrement the + // loop predecessor count of the destination chain. + if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) + BlockWorkList.push_back(*SI); + } + } } -/// \brief Merge a chain with any viable successor. +/// \brief Select the best successor for a block. /// -/// This routine walks the predecessors of the current block, looking for -/// viable merge candidates. It has strict rules it uses to determine when -/// a predecessor can be merged with the current block, which center around -/// preserving the CFG structure. It performs the merge if any viable candidate -/// is found. -void MachineBlockPlacement::mergeSuccessor(MachineBasicBlock *BB, - BlockChain *Chain, - BlockFilterSet *Filter) { - assert(BB); - assert(Chain); - - // If this block is not at the end of its chain, it cannot merge with any - // other chain. - if (Chain && *llvm::prior(Chain->end()) != BB) - return; - - // Walk through the successors looking for the highest probability edge. - MachineBasicBlock *Successor = 0; - BranchProbability BestProb = BranchProbability::getZero(); +/// This looks across all successors of a particular block and attempts to +/// select the "best" one to be the layout successor. It only considers direct +/// successors which also pass the block filter. It will attempt to avoid +/// breaking CFG structure, but cave and break such structures in the case of +/// very hot successor edges. +/// +/// \returns The best successor block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( + MachineBasicBlock *BB, BlockChain &Chain, + const BlockFilterSet *BlockFilter) { + const BranchProbability HotProb(4, 5); // 80% + + MachineBasicBlock *BestSucc = 0; + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we manually compute probabilities using the edge + // weights. This is suboptimal as it means that the somewhat subtle + // definition of edge weight semantics is encoded here as well. We should + // improve the MBPI interface to effeciently support query patterns such as + // this. + uint32_t BestWeight = 0; + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { - if (BB == *SI || (Filter && !Filter->count(*SI))) + if (BlockFilter && !BlockFilter->count(*SI)) continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); + continue; + } - BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI); - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb << "\n"); - if (!Successor || SuccProb > BestProb || (!(SuccProb < BestProb) && - BB->isLayoutSuccessor(*SI))) { - Successor = *SI; - BestProb = SuccProb; + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + + // Only consider successors which are either "hot", or wouldn't violate + // any CFG constraints. + if (SuccChain.LoopPredecessors != 0 && SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + continue; } + + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob)" + << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") + << "\n"); + if (BestSucc && BestWeight >= SuccWeight) + continue; + BestSucc = *SI; + BestWeight = SuccWeight; } - if (!Successor) - return; + return BestSucc; +} - // Grab a chain if it exists already for this successor and make sure the - // successor is at the start of the chain as we can't merge mid-chain. Also, - // if the successor chain is the same as our chain, we're already merged. - BlockChain *SuccChain = BlockToChain[Successor]; - if (SuccChain && (SuccChain == Chain || Successor != *SuccChain->begin())) - return; +namespace { +/// \brief Predicate struct to detect blocks already placed. +class IsBlockPlaced { + const BlockChain &PlacedChain; + const BlockToChainMapType &BlockToChain; - // We only merge chains across a CFG merge when the desired merge path is - // significantly hotter than the incoming edge. We define a hot edge more - // strictly than the BranchProbabilityInfo does, as the two predecessor - // blocks may have dramatically different incoming probabilities we need to - // account for. Therefor we use the "global" edge weight which is the - // branch's probability times the block frequency of the predecessor. - BlockFrequency MergeWeight = MBFI->getBlockFreq(BB); - MergeWeight *= MBPI->getEdgeProbability(BB, Successor); - // We only want to consider breaking the CFG when the merge weight is much - // higher (80% vs. 20%), so multiply it by 1/4. This will require the merged - // edge to be 4x more likely before we disrupt the CFG. This number matches - // the definition of "hot" in BranchProbabilityAnalysis (80% vs. 20%). - MergeWeight *= BranchProbability(1, 4); - for (MachineBasicBlock::pred_iterator PI = Successor->pred_begin(), - PE = Successor->pred_end(); - PI != PE; ++PI) { - if (BB == *PI || Successor == *PI) continue; - BlockFrequency PredWeight = MBFI->getBlockFreq(*PI); - PredWeight *= MBPI->getEdgeProbability(*PI, Successor); - - // Return on the first predecessor we find which outstrips our merge weight. - if (MergeWeight < PredWeight) - return; - DEBUG(dbgs() << "Breaking CFG edge!\n" - << " Edge from " << getBlockNum(BB) << " to " - << getBlockNum(Successor) << ": " << MergeWeight << "\n" - << " vs. " << getBlockNum(BB) << " to " - << getBlockNum(*PI) << ": " << PredWeight << "\n"); +public: + IsBlockPlaced(const BlockChain &PlacedChain, + const BlockToChainMapType &BlockToChain) + : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} + + bool operator()(MachineBasicBlock *BB) const { + return BlockToChain.lookup(BB) == &PlacedChain; + } +}; +} + +/// \brief Select the best block from a worklist. +/// +/// This looks through the provided worklist as a list of candidate basic +/// blocks and select the most profitable one to place. The definition of +/// profitable only really makes sense in the context of a loop. This returns +/// the most frequently visited block in the worklist, which in the case of +/// a loop, is the one most desirable to be physically close to the rest of the +/// loop body in order to improve icache behavior. +/// +/// \returns The best block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter) { + // Once we need to walk the worklist looking for a candidate, cleanup the + // worklist of already placed entries. + // FIXME: If this shows up on profiles, it could be folded (at the cost of + // some code complexity) into the loop below. + WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), + IsBlockPlaced(Chain, BlockToChain)), + WorkList.end()); + + MachineBasicBlock *BestBlock = 0; + BlockFrequency BestFreq; + for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), + WBE = WorkList.end(); + WBI != WBE; ++WBI) { + assert(!BlockFilter || BlockFilter->count(*WBI)); + BlockChain &SuccChain = *BlockToChain[*WBI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*WBI) + << " -> Already merged!\n"); + continue; + } + assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + + BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq + << " (freq)\n"); + if (BestBlock && BestFreq >= CandidateFreq) + continue; + BestBlock = *WBI; + BestFreq = CandidateFreq; + } + return BestBlock; +} + +/// \brief Retrieve the first unplaced basic block. +/// +/// This routine is called when we are unable to use the CFG to walk through +/// all of the basic blocks and form a chain due to unnatural loops in the CFG. +/// We walk through the sequence of blocks, starting from the +/// LastUnplacedBlockIdx. We update this index to avoid re-scanning the entire +/// sequence on repeated calls to this routine. +MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( + const BlockChain &PlacedChain, + ArrayRef<MachineBasicBlock *> Blocks, + unsigned &PrevUnplacedBlockIdx) { + for (unsigned i = PrevUnplacedBlockIdx, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + if (BlockToChain[BB] != &PlacedChain) { + PrevUnplacedBlockIdx = i; + return BB; + } } + return 0; +} + +void MachineBlockPlacement::buildChain( + MachineBasicBlock *BB, + BlockChain &Chain, + ArrayRef<MachineBasicBlock *> Blocks, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + assert(*Chain.begin() == BB); + SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. + unsigned PrevUnplacedBlockIdx = 0; + + MachineBasicBlock *LoopHeaderBB = BB; + markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + BB = *llvm::prior(Chain.end()); + for (;;) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + assert(*llvm::prior(Chain.end()) == BB); + MachineBasicBlock *BestSucc = 0; + + // Check for unreasonable branches, and forcibly merge the existing layout + // successor for them. We can handle cases that AnalyzeBranch can't: jump + // tables etc are fine. The case we want to handle specially is when there + // is potential fallthrough, but the branch cannot be analyzed. This + // includes blocks without terminators as well as other cases. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (TII->AnalyzeBranch(*BB, TBB, FBB, Cond) && BB->canFallThrough()) { + MachineFunction::iterator I(BB), NextI(llvm::next(I)); + // Ensure that the layout successor is a viable block, as we know that + // fallthrough is a possibility. Note that this may not be a valid block + // in the loop, but we allow that to cope with degenerate situations. + assert(NextI != BB->getParent()->end()); + BestSucc = NextI; + } + + // Otherwise, look for the best viable successor if there is one to place + // immediately after this block. + if (!BestSucc) + BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + + // If an immediate successor isn't available, look for the best viable + // block among those we've identified as not violating the loop's CFG at + // this point. This won't be a fallthrough, but it will increase locality. + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + + if (!BestSucc) { + BestSucc = getFirstUnplacedBlock(Chain, Blocks, PrevUnplacedBlockIdx); + if (!BestSucc) + break; + + DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " + "layout successor until the CFG reduces\n"); + } - DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " - << getBlockNum(Successor) << "\n"); - Chain->merge(Successor, SuccChain); + // Place this block, updating the datastructures to reflect its placement. + BlockChain &SuccChain = *BlockToChain[BestSucc]; + // Zero out LoopPredecessors for the successor we're about to merge in case + // we selected a successor that didn't fit naturally into the CFG. + SuccChain.LoopPredecessors = 0; + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) + << " to " << getBlockNum(BestSucc) << "\n"); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + Chain.merge(BestSucc, &SuccChain); + BB = *llvm::prior(Chain.end()); + }; + + DEBUG(dbgs() << "Finished forming chain for header block " + << getBlockNum(*Chain.begin()) << "\n"); } /// \brief Forms basic block chains from the natural loop structures. @@ -362,86 +540,192 @@ void MachineBlockPlacement::mergeSuccessor(MachineBasicBlock *BB, /// as much as possible. We can then stitch the chains together in a way which /// both preserves the topological structure and minimizes taken conditional /// branches. -void MachineBlockPlacement::buildLoopChains(MachineFunction &F, MachineLoop &L) { +void MachineBlockPlacement::buildLoopChains(MachineFunction &F, + MachineLoop &L) { // First recurse through any nested loops, building chains for those inner // loops. for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) buildLoopChains(F, **LI); - SmallPtrSet<MachineBasicBlock *, 16> LoopBlockSet(L.block_begin(), - L.block_end()); + SmallVector<MachineBasicBlock *, 16> BlockWorkList; + BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); + BlockChain &LoopChain = *BlockToChain[L.getHeader()]; - // Begin building up a set of chains of blocks within this loop which should - // remain contiguous. Some of the blocks already belong to a chain which - // represents an inner loop. - for (MachineLoop::block_iterator BI = L.block_begin(), BE = L.block_end(); + // FIXME: This is a really lame way of walking the chains in the loop: we + // walk the blocks, and use a set to prevent visiting a particular chain + // twice. + SmallPtrSet<BlockChain *, 4> UpdatedPreds; + for (MachineLoop::block_iterator BI = L.block_begin(), + BE = L.block_end(); BI != BE; ++BI) { - MachineBasicBlock *BB = *BI; - BlockChain *Chain = BlockToChain[BB]; - if (!Chain) Chain = CreateChain(BB); - mergeSuccessor(BB, Chain, &LoopBlockSet); + BlockChain &Chain = *BlockToChain[*BI]; + if (!UpdatedPreds.insert(&Chain) || BI == L.block_begin()) + continue; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*BI); } + + buildChain(*L.block_begin(), LoopChain, L.getBlocks(), BlockWorkList, + &LoopBlockSet); + + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadLoop = false; + if (LoopChain.LoopPredecessors) { + BadLoop = true; + dbgs() << "Loop chain contains a block without its preds placed!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; + } + for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); + BCI != BCE; ++BCI) + if (!LoopBlockSet.erase(*BCI)) { + // We don't mark the loop as bad here because there are real situations + // where this can occur. For example, with an unanalyzable fallthrough + // from a loop block to a non-loop block. + // FIXME: Such constructs shouldn't exist. Track them down and fix them. + dbgs() << "Loop chain contains a block not contained by the loop!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + } + + if (!LoopBlockSet.empty()) { + BadLoop = true; + for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), + LBE = LoopBlockSet.end(); + LBI != LBE; ++LBI) + dbgs() << "Loop contains blocks never placed into a chain!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*LBI) << "\n"; + } + assert(!BadLoop && "Detected problems with the placement of this loop."); + }); } void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { - // First build any loop-based chains. + // Ensure that every BB in the function has an associated chain to simplify + // the assumptions of the remaining algorithm. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + BlockToChain[&*FI] = + new (ChainAllocator.Allocate()) BlockChain(BlockToChain, &*FI); + + // Build any loop-based chains. for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; ++LI) buildLoopChains(F, **LI); - // Now walk the blocks of the function forming chains where they don't - // violate any CFG structure. - for (MachineFunction::iterator BI = F.begin(), BE = F.end(); - BI != BE; ++BI) { - MachineBasicBlock *BB = BI; - BlockChain *Chain = BlockToChain[BB]; - if (!Chain) Chain = CreateChain(BB); - mergeSuccessor(BB, Chain); - } -} + // We need a vector of blocks so that buildChain can handle unnatural CFG + // constructs by searching for unplaced blocks and just concatenating them. + SmallVector<MachineBasicBlock *, 16> Blocks; + Blocks.reserve(F.size()); -void MachineBlockPlacement::placeChainsTopologically(MachineFunction &F) { - MachineBasicBlock *EntryB = &F.front(); - assert(BlockToChain[EntryB] && "Missing chain for entry block"); - assert(*BlockToChain[EntryB]->begin() == EntryB && - "Entry block is not the head of the entry block chain"); + SmallVector<MachineBasicBlock *, 16> BlockWorkList; - // Walk the blocks in RPO, and insert each block for a chain in order the - // first time we see that chain. - MachineFunction::iterator InsertPos = F.begin(); - SmallPtrSet<BlockChain *, 16> VisitedChains; - ReversePostOrderTraversal<MachineBasicBlock *> RPOT(EntryB); - typedef ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator - rpo_iterator; - for (rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - BlockChain *Chain = BlockToChain[*I]; - assert(Chain); - if(!VisitedChains.insert(Chain)) + SmallPtrSet<BlockChain *, 4> UpdatedPreds; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = &*FI; + Blocks.push_back(BB); + BlockChain &Chain = *BlockToChain[BB]; + if (!UpdatedPreds.insert(&Chain)) continue; - for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); BI != BE; - ++BI) { - DEBUG(dbgs() << (BI == Chain->begin() ? "Placing chain " - : " ... ") - << getBlockName(*BI) << "\n"); - if (InsertPos != MachineFunction::iterator(*BI)) - F.splice(InsertPos, *BI); - else - ++InsertPos; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain) + continue; + ++Chain.LoopPredecessors; + } } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(BB); } - // Now that every block is in its final position, update all of the - // terminators. + BlockChain &FunctionChain = *BlockToChain[&F.front()]; + buildChain(&F.front(), FunctionChain, Blocks, BlockWorkList); + + typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadFunc = false; + FunctionBlockSetType FunctionBlockSet; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + FunctionBlockSet.insert(FI); + + for (BlockChain::iterator BCI = FunctionChain.begin(), + BCE = FunctionChain.end(); + BCI != BCE; ++BCI) + if (!FunctionBlockSet.erase(*BCI)) { + BadFunc = true; + dbgs() << "Function chain contains a block not in the function!\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + } + + if (!FunctionBlockSet.empty()) { + BadFunc = true; + for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), + FBE = FunctionBlockSet.end(); + FBI != FBE; ++FBI) + dbgs() << "Function contains blocks never placed into a chain!\n" + << " Bad block: " << getBlockName(*FBI) << "\n"; + } + assert(!BadFunc && "Detected problems with the block placement."); + }); + + // Splice the blocks into place. + MachineFunction::iterator InsertPos = F.begin(); SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + for (BlockChain::iterator BI = FunctionChain.begin(), + BE = FunctionChain.end(); + BI != BE; ++BI) { + DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(*BI) << "\n"); + if (InsertPos != MachineFunction::iterator(*BI)) + F.splice(InsertPos, *BI); + else + ++InsertPos; + + // Update the terminator of the previous block. + if (BI == FunctionChain.begin()) + continue; + MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this // boiler plate. Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*FI, TBB, FBB, Cond)) - FI->updateTerminator(); + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + PrevBB->updateTerminator(); } + + // Fixup the last block. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) + F.back().updateTerminator(); } /// \brief Recursive helper to align a loop and any nested loops. @@ -479,10 +763,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { assert(BlockToChain.empty()); buildCFGChains(F); - placeChainsTopologically(F); AlignLoops(F); BlockToChain.clear(); + ChainAllocator.DestroyAll(); // We always return true as we have no way to track whether the final order // differs from the original order. diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 9874037..e3cfa9e 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -27,19 +27,34 @@ INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", char MachineBranchProbabilityInfo::ID = 0; uint32_t MachineBranchProbabilityInfo:: -getSumForBlock(MachineBasicBlock *MBB) const { - uint32_t Sum = 0; - +getSumForBlock(MachineBasicBlock *MBB, uint32_t &Scale) const { + // First we compute the sum with 64-bits of precision, ensuring that cannot + // overflow by bounding the number of weights considered. Hopefully no one + // actually needs 2^32 successors. + assert(MBB->succ_size() < UINT32_MAX); + uint64_t Sum = 0; + Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - MachineBasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(MBB, Succ); - uint32_t PrevSum = Sum; - + uint32_t Weight = getEdgeWeight(MBB, *I); Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; } + // If the computed sum fits in 32-bits, we're done. + if (Sum <= UINT32_MAX) + return Sum; + + // Otherwise, compute the scale necessary to cause the weights to fit, and + // re-sum with that scale applied. + assert((Sum / UINT32_MAX) < UINT32_MAX); + Scale = (Sum / UINT32_MAX) + 1; + Sum = 0; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + uint32_t Weight = getEdgeWeight(MBB, *I); + Sum += Weight / Scale; + } + assert(Sum <= UINT32_MAX); return Sum; } @@ -61,26 +76,18 @@ bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock * MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { - uint32_t Sum = 0; uint32_t MaxWeight = 0; MachineBasicBlock *MaxSucc = 0; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - MachineBasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(MBB, Succ); - uint32_t PrevSum = Sum; - - Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; - + uint32_t Weight = getEdgeWeight(MBB, *I); if (Weight > MaxWeight) { MaxWeight = Weight; - MaxSucc = Succ; + MaxSucc = *I; } } - if (BranchProbability(MaxWeight, Sum) >= BranchProbability(4, 5)) + if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) return MaxSucc; return 0; @@ -89,8 +96,9 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { - uint32_t N = getEdgeWeight(Src, Dst); - uint32_t D = getSumForBlock(Src); + uint32_t Scale = 1; + uint32_t D = getSumForBlock(Src, Scale); + uint32_t N = getEdgeWeight(Src, Dst) / Scale; return BranchProbability(N, D); } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 26847d3..b3c28b0 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -659,7 +659,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check LiveInts liveness and kill. if (TargetRegisterInfo::isVirtualRegister(Reg) && LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex(); + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); if (!LI.liveAt(UseIdx)) { @@ -668,7 +668,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } // Check for extra kill flags. // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) { + if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { report("Live range continues after kill flag", MO, MONum); *OS << "Live range: " << LI << '\n'; } @@ -710,7 +710,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check LiveInts for a live range, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex(); + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { @@ -800,11 +800,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { LiveInts && !LiveInts->isNotInMIMap(MI)) { LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { + if (MCID.mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } - if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) { + if (MCID.mayStore() && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } @@ -1085,13 +1085,14 @@ void MachineVerifier::verifyLiveIntervals() { // Early clobber defs begin at USE slots, but other defs must begin at // DEF slots. if (isEarlyClobber) { - if (!VNI->def.isUse()) { - report("Early clobber def must be at a USE slot", MF); + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; } - } else if (!VNI->def.isDef()) { - report("Non-PHI, non-early clobber def must be at a DEF slot", MF); + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; } @@ -1192,8 +1193,8 @@ void MachineVerifier::verifyLiveIntervals() { // Check that VNI is live-out of all predecessors. for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot(); - const VNInfo *PVNI = LI.getVNInfoAt(PEnd); + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) continue; @@ -1201,7 +1202,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!PVNI) { report("Register not marked live out of predecessor", *PI); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at " + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << " in " << LI << '\n'; continue; } diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 315aedd..5f57088 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -55,11 +55,6 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::setDefault(RegAlloc); } - // This forces linking of the linear scan register allocator, - // so -regalloc=linearscan still works in clang. - if (Ctor == createLinearScanRegisterAllocator) - return createLinearScanRegisterAllocator(); - if (Ctor != createDefaultRegisterAllocator) return Ctor(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 71b7f4f..366c94e 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -248,7 +248,6 @@ public: static char ID; private: - void LRE_WillEraseInstruction(MachineInstr*); bool LRE_CanEraseVirtReg(unsigned); void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); @@ -350,11 +349,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { - // LRE itself will remove from SlotIndexes and parent basic block. - VRM->RemoveMachineInstrFromMaps(MI); -} - bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (unsigned PhysReg = VRM->getPhys(VirtReg)) { unassign(LIS->getInterval(VirtReg), PhysReg); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp deleted file mode 100644 index ce3fb90..0000000 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ /dev/null @@ -1,1543 +0,0 @@ -//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a linear scan register allocator. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regalloc" -#include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" -#include "VirtRegMap.h" -#include "VirtRegRewriter.h" -#include "RegisterClassInfo.h" -#include "Spiller.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <queue> -#include <memory> -#include <cmath> - -using namespace llvm; - -STATISTIC(NumIters , "Number of iterations performed"); -STATISTIC(NumBacktracks, "Number of times we had to backtrack"); -STATISTIC(NumCoalesce, "Number of copies coalesced"); -STATISTIC(NumDowngrade, "Number of registers downgraded"); - -static cl::opt<bool> -NewHeuristic("new-spilling-heuristic", - cl::desc("Use new spilling heuristic"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -TrivCoalesceEnds("trivial-coalesce-ends", - cl::desc("Attempt trivial coalescing of interval ends"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -AvoidWAWHazard("avoid-waw-hazard", - cl::desc("Avoid write-write hazards for some register classes"), - cl::init(false), cl::Hidden); - -static RegisterRegAlloc -linearscanRegAlloc("linearscan", "linear scan register allocator", - createLinearScanRegisterAllocator); - -namespace { - // When we allocate a register, add it to a fixed-size queue of - // registers to skip in subsequent allocations. This trades a small - // amount of register pressure and increased spills for flexibility in - // the post-pass scheduler. - // - // Note that in a the number of registers used for reloading spills - // will be one greater than the value of this option. - // - // One big limitation of this is that it doesn't differentiate between - // different register classes. So on x86-64, if there is xmm register - // pressure, it can caused fewer GPRs to be held in the queue. - static cl::opt<unsigned> - NumRecentlyUsedRegs("linearscan-skip-count", - cl::desc("Number of registers for linearscan to remember" - "to skip."), - cl::init(0), - cl::Hidden); - - struct RALinScan : public MachineFunctionPass { - static char ID; - RALinScan() : MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass( - *PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - - // Initialize the queue to record recently-used registers. - if (NumRecentlyUsedRegs > 0) - RecentRegs.resize(NumRecentlyUsedRegs, 0); - RecentNext = RecentRegs.begin(); - avoidWAW_ = 0; - } - - typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; - typedef SmallVector<IntervalPtr, 32> IntervalPtrs; - private: - /// RelatedRegClasses - This structure is built the first time a function is - /// compiled, and keeps track of which register classes have registers that - /// belong to multiple classes or have aliases that are in other classes. - EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses; - DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg; - - // NextReloadMap - For each register in the map, it maps to the another - // register which is defined by a reload from the same stack slot and - // both reloads are in the same basic block. - DenseMap<unsigned, unsigned> NextReloadMap; - - // DowngradedRegs - A set of registers which are being "downgraded", i.e. - // un-favored for allocation. - SmallSet<unsigned, 8> DowngradedRegs; - - // DowngradeMap - A map from virtual registers to physical registers being - // downgraded for the virtual registers. - DenseMap<unsigned, unsigned> DowngradeMap; - - MachineFunction* mf_; - MachineRegisterInfo* mri_; - const TargetMachine* tm_; - const TargetRegisterInfo* tri_; - const TargetInstrInfo* tii_; - BitVector allocatableRegs_; - BitVector reservedRegs_; - LiveIntervals* li_; - MachineLoopInfo *loopInfo; - RegisterClassInfo RegClassInfo; - - /// handled_ - Intervals are added to the handled_ set in the order of their - /// start value. This is uses for backtracking. - std::vector<LiveInterval*> handled_; - - /// fixed_ - Intervals that correspond to machine registers. - /// - IntervalPtrs fixed_; - - /// active_ - Intervals that are currently being processed, and which have a - /// live range active for the current point. - IntervalPtrs active_; - - /// inactive_ - Intervals that are currently being processed, but which have - /// a hold at the current point. - IntervalPtrs inactive_; - - typedef std::priority_queue<LiveInterval*, - SmallVector<LiveInterval*, 64>, - greater_ptr<LiveInterval> > IntervalHeap; - IntervalHeap unhandled_; - - /// regUse_ - Tracks register usage. - SmallVector<unsigned, 32> regUse_; - SmallVector<unsigned, 32> regUseBackUp_; - - /// vrm_ - Tracks register assignments. - VirtRegMap* vrm_; - - std::auto_ptr<VirtRegRewriter> rewriter_; - - std::auto_ptr<Spiller> spiller_; - - // The queue of recently-used registers. - SmallVector<unsigned, 4> RecentRegs; - SmallVector<unsigned, 4>::iterator RecentNext; - - // Last write-after-write register written. - unsigned avoidWAW_; - - // Record that we just picked this register. - void recordRecentlyUsed(unsigned reg) { - assert(reg != 0 && "Recently used register is NOREG!"); - if (!RecentRegs.empty()) { - *RecentNext++ = reg; - if (RecentNext == RecentRegs.end()) - RecentNext = RecentRegs.begin(); - } - } - - public: - virtual const char* getPassName() const { - return "Linear Scan Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<SlotIndexes>(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - // Make sure PassManager knows which analyses to make available - // to coalescing and which analyses coalescing invalidates. - AU.addRequiredTransitiveID(RegisterCoalescerPassID); - AU.addRequired<CalculateSpillWeights>(); - AU.addRequiredID(LiveStacksID); - AU.addPreservedID(LiveStacksID); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addRequired<VirtRegMap>(); - AU.addPreserved<VirtRegMap>(); - AU.addRequired<LiveDebugVariables>(); - AU.addPreserved<LiveDebugVariables>(); - AU.addRequiredID(MachineDominatorsID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - /// runOnMachineFunction - register allocate the whole function - bool runOnMachineFunction(MachineFunction&); - - // Determine if we skip this register due to its being recently used. - bool isRecentlyUsed(unsigned reg) const { - return reg == avoidWAW_ || - std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); - } - - private: - /// linearScan - the linear scan algorithm - void linearScan(); - - /// initIntervalSets - initialize the interval sets. - /// - void initIntervalSets(); - - /// processActiveIntervals - expire old intervals and move non-overlapping - /// ones to the inactive list. - void processActiveIntervals(SlotIndex CurPoint); - - /// processInactiveIntervals - expire old intervals and move overlapping - /// ones to the active list. - void processInactiveIntervals(SlotIndex CurPoint); - - /// hasNextReloadInterval - Return the next liveinterval that's being - /// defined by a reload from the same SS as the specified one. - LiveInterval *hasNextReloadInterval(LiveInterval *cur); - - /// DowngradeRegister - Downgrade a register for allocation. - void DowngradeRegister(LiveInterval *li, unsigned Reg); - - /// UpgradeRegister - Upgrade a register for allocation. - void UpgradeRegister(unsigned Reg); - - /// assignRegOrStackSlotAtInterval - assign a register if one - /// is available, or spill. - void assignRegOrStackSlotAtInterval(LiveInterval* cur); - - void updateSpillWeights(std::vector<float> &Weights, - unsigned reg, float weight, - const TargetRegisterClass *RC); - - /// findIntervalsToSpill - Determine the intervals to spill for the - /// specified interval. It's passed the physical registers whose spill - /// weight is the lowest among all the registers whose live intervals - /// conflict with the interval. - void findIntervalsToSpill(LiveInterval *cur, - std::vector<std::pair<unsigned,float> > &Candidates, - unsigned NumCands, - SmallVector<LiveInterval*, 8> &SpillIntervals); - - /// attemptTrivialCoalescing - If a simple interval is defined by a copy, - /// try to allocate the definition to the same register as the source, - /// if the register is not defined during the life time of the interval. - /// This eliminates a copy, and is used to coalesce copies which were not - /// coalesced away before allocation either due to dest and src being in - /// different register classes or because the coalescer was overly - /// conservative. - unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); - - /// - /// Register usage / availability tracking helpers. - /// - - void initRegUses() { - regUse_.resize(tri_->getNumRegs(), 0); - regUseBackUp_.resize(tri_->getNumRegs(), 0); - } - - void finalizeRegUses() { -#ifndef NDEBUG - // Verify all the registers are "freed". - bool Error = false; - for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { - if (regUse_[i] != 0) { - dbgs() << tri_->getName(i) << " is still in use!\n"; - Error = true; - } - } - if (Error) - llvm_unreachable(0); -#endif - regUse_.clear(); - regUseBackUp_.clear(); - } - - void addRegUse(unsigned physReg) { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - ++regUse_[physReg]; - for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) - ++regUse_[*as]; - } - - void delRegUse(unsigned physReg) { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - assert(regUse_[physReg] != 0); - --regUse_[physReg]; - for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) { - assert(regUse_[*as] != 0); - --regUse_[*as]; - } - } - - bool isRegAvail(unsigned physReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - return regUse_[physReg] == 0; - } - - void backUpRegUses() { - regUseBackUp_ = regUse_; - } - - void restoreRegUses() { - regUse_ = regUseBackUp_; - } - - /// - /// Register handling helpers. - /// - - /// getFreePhysReg - return a free physical register for this virtual - /// register interval if we have one, otherwise return 0. - unsigned getFreePhysReg(LiveInterval* cur); - unsigned getFreePhysReg(LiveInterval* cur, - const TargetRegisterClass *RC, - unsigned MaxInactiveCount, - SmallVector<unsigned, 256> &inactiveCounts, - bool SkipDGRegs); - - /// getFirstNonReservedPhysReg - return the first non-reserved physical - /// register in the register class. - unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { - ArrayRef<unsigned> O = RegClassInfo.getOrder(RC); - assert(!O.empty() && "All registers reserved?!"); - return O.front(); - } - - void ComputeRelatedRegClasses(); - - template <typename ItTy> - void printIntervals(const char* const str, ItTy i, ItTy e) const { - DEBUG({ - if (str) - dbgs() << str << " intervals:\n"; - - for (; i != e; ++i) { - dbgs() << '\t' << *i->first << " -> "; - - unsigned reg = i->first->reg; - if (TargetRegisterInfo::isVirtualRegister(reg)) - reg = vrm_->getPhys(reg); - - dbgs() << tri_->getName(reg) << '\n'; - } - }); - } - }; - char RALinScan::ID = 0; -} - -INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) - -void RALinScan::ComputeRelatedRegClasses() { - // First pass, add all reg classes to the union, and determine at least one - // reg class that each register is in. - bool HasAliases = false; - for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(), - E = tri_->regclass_end(); RCI != E; ++RCI) { - RelatedRegClasses.insert(*RCI); - for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); - I != E; ++I) { - HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; - - const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; - if (PRC) { - // Already processed this register. Just make sure we know that - // multiple register classes share a register. - RelatedRegClasses.unionSets(PRC, *RCI); - } else { - PRC = *RCI; - } - } - } - - // Second pass, now that we know conservatively what register classes each reg - // belongs to, add info about aliases. We don't need to do this for targets - // without register aliases. - if (HasAliases) - for (DenseMap<unsigned, const TargetRegisterClass*>::iterator - I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); - I != E; ++I) - for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) { - const TargetRegisterClass *AliasClass = - OneClassForEachPhysReg.lookup(*AS); - if (AliasClass) - RelatedRegClasses.unionSets(I->second, AliasClass); - } -} - -/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try -/// allocate the definition the same register as the source register if the -/// register is not defined during live time of the interval. If the interval is -/// killed by a copy, try to use the destination register. This eliminates a -/// copy. This is used to coalesce copies which were not coalesced away before -/// allocation either due to dest and src being in different register classes or -/// because the coalescer was overly conservative. -unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { - unsigned Preference = vrm_->getRegAllocPref(cur.reg); - if ((Preference && Preference == Reg) || !cur.containsOneValue()) - return Reg; - - // We cannot handle complicated live ranges. Simple linear stuff only. - if (cur.ranges.size() != 1) - return Reg; - - const LiveRange &range = cur.ranges.front(); - - VNInfo *vni = range.valno; - if (vni->isUnused() || !vni->def.isValid()) - return Reg; - - unsigned CandReg; - { - MachineInstr *CopyMI; - if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) - // Defined by a copy, try to extend SrcReg forward - CandReg = CopyMI->getOperand(1).getReg(); - else if (TrivCoalesceEnds && - (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && - CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg()) - // Only used by a copy, try to extend DstReg backwards - CandReg = CopyMI->getOperand(0).getReg(); - else - return Reg; - - // If the target of the copy is a sub-register then don't coalesce. - if(CopyMI->getOperand(0).getSubReg()) - return Reg; - } - - if (TargetRegisterInfo::isVirtualRegister(CandReg)) { - if (!vrm_->isAssignedReg(CandReg)) - return Reg; - CandReg = vrm_->getPhys(CandReg); - } - if (Reg == CandReg) - return Reg; - - const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); - if (!RC->contains(CandReg)) - return Reg; - - if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg)) - return Reg; - - // Try to coalesce. - DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) - << '\n'); - vrm_->clearVirt(cur.reg); - vrm_->assignVirt2Phys(cur.reg, CandReg); - - ++NumCoalesce; - return CandReg; -} - -bool RALinScan::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &fn.getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - allocatableRegs_ = tri_->getAllocatableSet(fn); - reservedRegs_ = tri_->getReservedRegs(fn); - li_ = &getAnalysis<LiveIntervals>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); - RegClassInfo.runOnMachineFunction(fn); - - // We don't run the coalescer here because we have no reason to - // interact with it. If the coalescer requires interaction, it - // won't do anything. If it doesn't require interaction, we assume - // it was run as a separate pass. - - // If this is the first function compiled, compute the related reg classes. - if (RelatedRegClasses.empty()) - ComputeRelatedRegClasses(); - - // Also resize register usage trackers. - initRegUses(); - - vrm_ = &getAnalysis<VirtRegMap>(); - if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); - - spiller_.reset(createSpiller(*this, *mf_, *vrm_)); - - initIntervalSets(); - - linearScan(); - - // Rewrite spill code and update the PhysRegsUsed set. - rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); - - // Write out new DBG_VALUE instructions. - getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_); - - assert(unhandled_.empty() && "Unhandled live intervals remain!"); - - finalizeRegUses(); - - fixed_.clear(); - active_.clear(); - inactive_.clear(); - handled_.clear(); - NextReloadMap.clear(); - DowngradedRegs.clear(); - DowngradeMap.clear(); - spiller_.reset(0); - - return true; -} - -/// initIntervalSets - initialize the interval sets. -/// -void RALinScan::initIntervalSets() -{ - assert(unhandled_.empty() && fixed_.empty() && - active_.empty() && inactive_.empty() && - "interval sets should be empty on initialization"); - - handled_.reserve(li_->getNumIntervals()); - - for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) { - mri_->setPhysRegUsed(i->second->reg); - fixed_.push_back(std::make_pair(i->second, i->second->begin())); - } - } else { - if (i->second->empty()) { - assignRegOrStackSlotAtInterval(i->second); - } - else - unhandled_.push(i->second); - } - } -} - -void RALinScan::linearScan() { - // linear scan algorithm - DEBUG({ - dbgs() << "********** LINEAR SCAN **********\n" - << "********** Function: " - << mf_->getFunction()->getName() << '\n'; - printIntervals("fixed", fixed_.begin(), fixed_.end()); - }); - - while (!unhandled_.empty()) { - // pick the interval with the earliest start point - LiveInterval* cur = unhandled_.top(); - unhandled_.pop(); - ++NumIters; - DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n'); - - assert(!cur->empty() && "Empty interval in unhandled set."); - - processActiveIntervals(cur->beginIndex()); - processInactiveIntervals(cur->beginIndex()); - - assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && - "Can only allocate virtual registers!"); - - // Allocating a virtual register. try to find a free - // physical register or spill an interval (possibly this one) in order to - // assign it one. - assignRegOrStackSlotAtInterval(cur); - - DEBUG({ - printIntervals("active", active_.begin(), active_.end()); - printIntervals("inactive", inactive_.begin(), inactive_.end()); - }); - } - - // Expire any remaining active intervals - while (!active_.empty()) { - IntervalPtr &IP = active_.back(); - unsigned reg = IP.first->reg; - DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - active_.pop_back(); - } - - // Expire any remaining inactive intervals - DEBUG({ - for (IntervalPtrs::reverse_iterator - i = inactive_.rbegin(); i != inactive_.rend(); ++i) - dbgs() << "\tinterval " << *i->first << " expired\n"; - }); - inactive_.clear(); - - // Add live-ins to every BB except for entry. Also perform trivial coalescing. - MachineFunction::iterator EntryMBB = mf_->begin(); - SmallVector<MachineBasicBlock*, 8> LiveInMBBs; - for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - LiveInterval &cur = *i->second; - unsigned Reg = 0; - bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg); - if (isPhys) - Reg = cur.reg; - else if (vrm_->isAssignedReg(cur.reg)) - Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); - if (!Reg) - continue; - // Ignore splited live intervals. - if (!isPhys && vrm_->getPreSplitReg(cur.reg)) - continue; - - for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); - I != E; ++I) { - const LiveRange &LR = *I; - if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { - for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) - if (LiveInMBBs[i] != EntryMBB) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Adding a virtual register to livein set?"); - LiveInMBBs[i]->addLiveIn(Reg); - } - LiveInMBBs.clear(); - } - } - } - - DEBUG(dbgs() << *vrm_); - - // Look for physical registers that end up not being allocated even though - // register allocator had to spill other registers in its register class. - if (!vrm_->FindUnusedRegisters(li_)) - return; -} - -/// processActiveIntervals - expire old intervals and move non-overlapping ones -/// to the inactive list. -void RALinScan::processActiveIntervals(SlotIndex CurPoint) -{ - DEBUG(dbgs() << "\tprocessing active intervals:\n"); - - for (unsigned i = 0, e = active_.size(); i != e; ++i) { - LiveInterval *Interval = active_[i].first; - LiveInterval::iterator IntervalPos = active_[i].second; - unsigned reg = Interval->reg; - - IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); - - if (IntervalPos == Interval->end()) { // Remove expired intervals. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - - // Pop off the end of the list. - active_[i] = active_.back(); - active_.pop_back(); - --i; --e; - - } else if (IntervalPos->start > CurPoint) { - // Move inactive intervals to inactive list. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - // add to inactive. - inactive_.push_back(std::make_pair(Interval, IntervalPos)); - - // Pop off the end of the list. - active_[i] = active_.back(); - active_.pop_back(); - --i; --e; - } else { - // Otherwise, just update the iterator position. - active_[i].second = IntervalPos; - } - } -} - -/// processInactiveIntervals - expire old intervals and move overlapping -/// ones to the active list. -void RALinScan::processInactiveIntervals(SlotIndex CurPoint) -{ - DEBUG(dbgs() << "\tprocessing inactive intervals:\n"); - - for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { - LiveInterval *Interval = inactive_[i].first; - LiveInterval::iterator IntervalPos = inactive_[i].second; - unsigned reg = Interval->reg; - - IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); - - if (IntervalPos == Interval->end()) { // remove expired intervals. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); - - // Pop off the end of the list. - inactive_[i] = inactive_.back(); - inactive_.pop_back(); - --i; --e; - } else if (IntervalPos->start <= CurPoint) { - // move re-activated intervals in active list - DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - addRegUse(reg); - // add to active - active_.push_back(std::make_pair(Interval, IntervalPos)); - - // Pop off the end of the list. - inactive_[i] = inactive_.back(); - inactive_.pop_back(); - --i; --e; - } else { - // Otherwise, just update the iterator position. - inactive_[i].second = IntervalPos; - } - } -} - -/// updateSpillWeights - updates the spill weights of the specifed physical -/// register and its weight. -void RALinScan::updateSpillWeights(std::vector<float> &Weights, - unsigned reg, float weight, - const TargetRegisterClass *RC) { - SmallSet<unsigned, 4> Processed; - SmallSet<unsigned, 4> SuperAdded; - SmallVector<unsigned, 4> Supers; - Weights[reg] += weight; - Processed.insert(reg); - for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) { - Weights[*as] += weight; - Processed.insert(*as); - if (tri_->isSubRegister(*as, reg) && - SuperAdded.insert(*as) && - RC->contains(*as)) { - Supers.push_back(*as); - } - } - - // If the alias is a super-register, and the super-register is in the - // register class we are trying to allocate. Then add the weight to all - // sub-registers of the super-register even if they are not aliases. - // e.g. allocating for GR32, bh is not used, updating bl spill weight. - // bl should get the same spill weight otherwise it will be chosen - // as a spill candidate since spilling bh doesn't make ebx available. - for (unsigned i = 0, e = Supers.size(); i != e; ++i) { - for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) - if (!Processed.count(*sr)) - Weights[*sr] += weight; - } -} - -static -RALinScan::IntervalPtrs::iterator -FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { - for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); - I != E; ++I) - if (I->first == LI) return I; - return IP.end(); -} - -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, - SlotIndex Point){ - for (unsigned i = 0, e = V.size(); i != e; ++i) { - RALinScan::IntervalPtr &IP = V[i]; - LiveInterval::iterator I = std::upper_bound(IP.first->begin(), - IP.second, Point); - if (I != IP.first->begin()) --I; - IP.second = I; - } -} - -/// getConflictWeight - Return the number of conflicts between cur -/// live interval and defs and uses of Reg weighted by loop depthes. -static -float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, - MachineRegisterInfo *mri_, - MachineLoopInfo *loopInfo) { - float Conflicts = 0; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), - E = mri_->reg_end(); I != E; ++I) { - MachineInstr *MI = &*I; - if (cur->liveAt(li_->getInstructionIndex(MI))) { - unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); - Conflicts += std::pow(10.0f, (float)loopDepth); - } - } - return Conflicts; -} - -/// findIntervalsToSpill - Determine the intervals to spill for the -/// specified interval. It's passed the physical registers whose spill -/// weight is the lowest among all the registers whose live intervals -/// conflict with the interval. -void RALinScan::findIntervalsToSpill(LiveInterval *cur, - std::vector<std::pair<unsigned,float> > &Candidates, - unsigned NumCands, - SmallVector<LiveInterval*, 8> &SpillIntervals) { - // We have figured out the *best* register to spill. But there are other - // registers that are pretty good as well (spill weight within 3%). Spill - // the one that has fewest defs and uses that conflict with cur. - float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; - SmallVector<LiveInterval*, 8> SLIs[3]; - - DEBUG({ - dbgs() << "\tConsidering " << NumCands << " candidates: "; - for (unsigned i = 0; i != NumCands; ++i) - dbgs() << tri_->getName(Candidates[i].first) << " "; - dbgs() << "\n"; - }); - - // Calculate the number of conflicts of each candidate. - for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { - unsigned Reg = i->first->reg; - unsigned PhysReg = vrm_->getPhys(Reg); - if (!cur->overlapsFrom(*i->first, i->second)) - continue; - for (unsigned j = 0; j < NumCands; ++j) { - unsigned Candidate = Candidates[j].first; - if (tri_->regsOverlap(PhysReg, Candidate)) { - if (NumCands > 1) - Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); - SLIs[j].push_back(i->first); - } - } - } - - for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ - unsigned Reg = i->first->reg; - unsigned PhysReg = vrm_->getPhys(Reg); - if (!cur->overlapsFrom(*i->first, i->second-1)) - continue; - for (unsigned j = 0; j < NumCands; ++j) { - unsigned Candidate = Candidates[j].first; - if (tri_->regsOverlap(PhysReg, Candidate)) { - if (NumCands > 1) - Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); - SLIs[j].push_back(i->first); - } - } - } - - // Which is the best candidate? - unsigned BestCandidate = 0; - float MinConflicts = Conflicts[0]; - for (unsigned i = 1; i != NumCands; ++i) { - if (Conflicts[i] < MinConflicts) { - BestCandidate = i; - MinConflicts = Conflicts[i]; - } - } - - std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(), - std::back_inserter(SpillIntervals)); -} - -namespace { - struct WeightCompare { - private: - const RALinScan &Allocator; - - public: - WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {} - - typedef std::pair<unsigned, float> RegWeightPair; - bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { - return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first); - } - }; -} - -static bool weightsAreClose(float w1, float w2) { - if (!NewHeuristic) - return false; - - float diff = w1 - w2; - if (diff <= 0.02f) // Within 0.02f - return true; - return (diff / w2) <= 0.05f; // Within 5%. -} - -LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { - DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg); - if (I == NextReloadMap.end()) - return 0; - return &li_->getInterval(I->second); -} - -void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { - for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) { - bool isNew = DowngradedRegs.insert(*AS); - (void)isNew; // Silence compiler warning. - assert(isNew && "Multiple reloads holding the same register?"); - DowngradeMap.insert(std::make_pair(li->reg, *AS)); - } - ++NumDowngrade; -} - -void RALinScan::UpgradeRegister(unsigned Reg) { - if (Reg) { - DowngradedRegs.erase(Reg); - for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) - DowngradedRegs.erase(*AS); - } -} - -namespace { - struct LISorter { - bool operator()(LiveInterval* A, LiveInterval* B) { - return A->beginIndex() < B->beginIndex(); - } - }; -} - -/// assignRegOrStackSlotAtInterval - assign a register if one is available, or -/// spill. -void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - DEBUG(dbgs() << "\tallocating current interval from " - << RC->getName() << ": "); - - // This is an implicitly defined live interval, just assign any register. - if (cur->empty()) { - unsigned physReg = vrm_->getRegAllocPref(cur->reg); - if (!physReg) - physReg = getFirstNonReservedPhysReg(RC); - DEBUG(dbgs() << tri_->getName(physReg) << '\n'); - // Note the register is not really in use. - vrm_->assignVirt2Phys(cur->reg, physReg); - return; - } - - backUpRegUses(); - - std::vector<std::pair<unsigned, float> > SpillWeightsToAdd; - SlotIndex StartPosition = cur->beginIndex(); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - // If start of this live interval is defined by a move instruction and its - // source is assigned a physical register that is compatible with the target - // register class, then we should try to assign it the same register. - // This can happen when the move is from a larger register class to a smaller - // one, e.g. X86::mov32to32_. These move instructions are not coalescable. - if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { - VNInfo *vni = cur->begin()->valno; - if (!vni->isUnused() && vni->def.isValid()) { - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - if (CopyMI && CopyMI->isCopy()) { - unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); - unsigned SrcReg = CopyMI->getOperand(1).getReg(); - unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg(); - unsigned Reg = 0; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) - Reg = SrcReg; - else if (vrm_->isAssignedReg(SrcReg)) - Reg = vrm_->getPhys(SrcReg); - if (Reg) { - if (SrcSubReg) - Reg = tri_->getSubReg(Reg, SrcSubReg); - if (DstSubReg) - Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); - if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - mri_->setRegAllocationHint(cur->reg, 0, Reg); - } - } - } - } - - // For every interval in inactive we overlap with, mark the - // register as not free and update spill weights. - for (IntervalPtrs::const_iterator i = inactive_.begin(), - e = inactive_.end(); i != e; ++i) { - unsigned Reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "Can only allocate virtual registers!"); - const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); - // If this is not in a related reg class to the register we're allocating, - // don't check it. - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - cur->overlapsFrom(*i->first, i->second-1)) { - Reg = vrm_->getPhys(Reg); - addRegUse(Reg); - SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); - } - } - - // Speculatively check to see if we can get a register right now. If not, - // we know we won't be able to by adding more constraints. If so, we can - // check to see if it is valid. Doing an exhaustive search of the fixed_ list - // is very bad (it contains all callee clobbered registers for any functions - // with a call), so we want to avoid doing that if possible. - unsigned physReg = getFreePhysReg(cur); - unsigned BestPhysReg = physReg; - if (physReg) { - // We got a register. However, if it's in the fixed_ list, we might - // conflict with it. Check to see if we conflict with it or any of its - // aliases. - SmallSet<unsigned, 8> RegAliases; - for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) - RegAliases.insert(*AS); - - bool ConflictsWithFixed = false; - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { - // Okay, this reg is on the fixed list. Check to see if we actually - // conflict. - LiveInterval *I = IP.first; - if (I->endIndex() > StartPosition) { - LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); - IP.second = II; - if (II != I->begin() && II->start > StartPosition) - --II; - if (cur->overlapsFrom(*I, II)) { - ConflictsWithFixed = true; - break; - } - } - } - } - - // Okay, the register picked by our speculative getFreePhysReg call turned - // out to be in use. Actually add all of the conflicting fixed registers to - // regUse_ so we can do an accurate query. - if (ConflictsWithFixed) { - // For every interval in fixed we overlap with, mark the register as not - // free and update spill weights. - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - LiveInterval *I = IP.first; - - const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - I->endIndex() > StartPosition) { - LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); - IP.second = II; - if (II != I->begin() && II->start > StartPosition) - --II; - if (cur->overlapsFrom(*I, II)) { - unsigned reg = I->reg; - addRegUse(reg); - SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); - } - } - } - - // Using the newly updated regUse_ object, which includes conflicts in the - // future, see if there are any registers available. - physReg = getFreePhysReg(cur); - } - } - - // Restore the physical register tracker, removing information about the - // future. - restoreRegUses(); - - // If we find a free register, we are done: assign this virtual to - // the free physical register and add this interval to the active - // list. - if (physReg) { - DEBUG(dbgs() << tri_->getName(physReg) << '\n'); - assert(RC->contains(physReg) && "Invalid candidate"); - vrm_->assignVirt2Phys(cur->reg, physReg); - addRegUse(physReg); - active_.push_back(std::make_pair(cur, cur->begin())); - handled_.push_back(cur); - - // Remember physReg for avoiding a write-after-write hazard in the next - // instruction. - if (AvoidWAWHazard && - tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) - avoidWAW_ = physReg; - - // "Upgrade" the physical register since it has been allocated. - UpgradeRegister(physReg); - if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { - // "Downgrade" physReg to try to keep physReg from being allocated until - // the next reload from the same SS is allocated. - mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); - DowngradeRegister(cur, physReg); - } - return; - } - DEBUG(dbgs() << "no free registers\n"); - - // Compile the spill weights into an array that is better for scanning. - std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f); - for (std::vector<std::pair<unsigned, float> >::iterator - I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) - updateSpillWeights(SpillWeights, I->first, I->second, RC); - - // for each interval in active, update spill weights. - for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - updateSpillWeights(SpillWeights, reg, i->first->weight, RC); - } - - DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); - - // Find a register to spill. - float minWeight = HUGE_VALF; - unsigned minReg = 0; - - bool Found = false; - std::vector<std::pair<unsigned,float> > RegsWeights; - ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC); - if (!minReg || SpillWeights[minReg] == HUGE_VALF) - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned reg = Order[i]; - float regWeight = SpillWeights[reg]; - // Skip recently allocated registers and reserved registers. - if (minWeight > regWeight && !isRecentlyUsed(reg)) - Found = true; - RegsWeights.push_back(std::make_pair(reg, regWeight)); - } - - // If we didn't find a register that is spillable, try aliases? - if (!Found) { - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned reg = Order[i]; - // No need to worry about if the alias register size < regsize of RC. - // We are going to spill all registers that alias it anyway. - for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) - RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as])); - } - } - - // Sort all potential spill candidates by weight. - std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this)); - minReg = RegsWeights[0].first; - minWeight = RegsWeights[0].second; - if (minWeight == HUGE_VALF) { - // All registers must have inf weight. Just grab one! - minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC); - if (cur->weight == HUGE_VALF || - li_->getApproximateInstructionCount(*cur) == 0) { - // Spill a physical register around defs and uses. - if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) { - // spillPhysRegAroundRegDefsUses may have invalidated iterator stored - // in fixed_. Reset them. - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - LiveInterval *I = IP.first; - if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg)) - IP.second = I->advanceTo(I->begin(), StartPosition); - } - - DowngradedRegs.clear(); - assignRegOrStackSlotAtInterval(cur); - } else { - assert(false && "Ran out of registers during register allocation!"); - report_fatal_error("Ran out of registers during register allocation!"); - } - return; - } - } - - // Find up to 3 registers to consider as spill candidates. - unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1; - while (LastCandidate > 1) { - if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight)) - break; - --LastCandidate; - } - - DEBUG({ - dbgs() << "\t\tregister(s) with min weight(s): "; - - for (unsigned i = 0; i != LastCandidate; ++i) - dbgs() << tri_->getName(RegsWeights[i].first) - << " (" << RegsWeights[i].second << ")\n"; - }); - - // If the current has the minimum weight, we need to spill it and - // add any added intervals back to unhandled, and restart - // linearscan. - if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector<LiveInterval*, 8> added; - LiveRangeEdit LRE(*cur, added); - spiller_->spill(LRE); - - std::sort(added.begin(), added.end(), LISorter()); - if (added.empty()) - return; // Early exit if all spills were folded. - - // Merge added with unhandled. Note that we have already sorted - // intervals returned by addIntervalsForSpills by their starting - // point. - // This also update the NextReloadMap. That is, it adds mapping from a - // register defined by a reload from SS to the next reload from SS in the - // same basic block. - MachineBasicBlock *LastReloadMBB = 0; - LiveInterval *LastReload = 0; - int LastReloadSS = VirtRegMap::NO_STACK_SLOT; - for (unsigned i = 0, e = added.size(); i != e; ++i) { - LiveInterval *ReloadLi = added[i]; - if (ReloadLi->weight == HUGE_VALF && - li_->getApproximateInstructionCount(*ReloadLi) == 0) { - SlotIndex ReloadIdx = ReloadLi->beginIndex(); - MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); - int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); - if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { - // Last reload of same SS is in the same MBB. We want to try to - // allocate both reloads the same register and make sure the reg - // isn't clobbered in between if at all possible. - assert(LastReload->beginIndex() < ReloadIdx); - NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); - } - LastReloadMBB = ReloadMBB; - LastReload = ReloadLi; - LastReloadSS = ReloadSS; - } - unhandled_.push(ReloadLi); - } - return; - } - - ++NumBacktracks; - - // Push the current interval back to unhandled since we are going - // to re-run at least this iteration. Since we didn't modify it it - // should go back right in the front of the list - unhandled_.push(cur); - - assert(TargetRegisterInfo::isPhysicalRegister(minReg) && - "did not choose a register to spill?"); - - // We spill all intervals aliasing the register with - // minimum weight, rollback to the interval with the earliest - // start point and let the linear scan algorithm run again - SmallVector<LiveInterval*, 8> spillIs; - - // Determine which intervals have to be spilled. - findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs); - - // Set of spilled vregs (used later to rollback properly) - SmallSet<unsigned, 8> spilled; - - // The earliest start of a Spilled interval indicates up to where - // in handled we need to roll back - assert(!spillIs.empty() && "No spill intervals?"); - SlotIndex earliestStart = spillIs[0]->beginIndex(); - - // Spill live intervals of virtual regs mapped to the physical register we - // want to clear (and its aliases). We only spill those that overlap with the - // current interval as the rest do not affect its allocation. we also keep - // track of the earliest start of all spilled live intervals since this will - // mark our rollback point. - SmallVector<LiveInterval*, 8> added; - while (!spillIs.empty()) { - LiveInterval *sli = spillIs.back(); - spillIs.pop_back(); - DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); - if (sli->beginIndex() < earliestStart) - earliestStart = sli->beginIndex(); - LiveRangeEdit LRE(*sli, added, 0, &spillIs); - spiller_->spill(LRE); - spilled.insert(sli->reg); - } - - // Include any added intervals in earliestStart. - for (unsigned i = 0, e = added.size(); i != e; ++i) { - SlotIndex SI = added[i]->beginIndex(); - if (SI < earliestStart) - earliestStart = SI; - } - - DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); - - // Scan handled in reverse order up to the earliest start of a - // spilled live interval and undo each one, restoring the state of - // unhandled. - while (!handled_.empty()) { - LiveInterval* i = handled_.back(); - // If this interval starts before t we are done. - if (!i->empty() && i->beginIndex() < earliestStart) - break; - DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n'); - handled_.pop_back(); - - // When undoing a live interval allocation we must know if it is active or - // inactive to properly update regUse_ and the VirtRegMap. - IntervalPtrs::iterator it; - if ((it = FindIntervalInVector(active_, i)) != active_.end()) { - active_.erase(it); - assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); - if (!spilled.count(i->reg)) - unhandled_.push(i); - delRegUse(vrm_->getPhys(i->reg)); - vrm_->clearVirt(i->reg); - } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { - inactive_.erase(it); - assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); - if (!spilled.count(i->reg)) - unhandled_.push(i); - vrm_->clearVirt(i->reg); - } else { - assert(TargetRegisterInfo::isVirtualRegister(i->reg) && - "Can only allocate virtual registers!"); - vrm_->clearVirt(i->reg); - unhandled_.push(i); - } - - DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg); - if (ii == DowngradeMap.end()) - // It interval has a preference, it must be defined by a copy. Clear the - // preference now since the source interval allocation may have been - // undone as well. - mri_->setRegAllocationHint(i->reg, 0, 0); - else { - UpgradeRegister(ii->second); - } - } - - // Rewind the iterators in the active, inactive, and fixed lists back to the - // point we reverted to. - RevertVectorIteratorsTo(active_, earliestStart); - RevertVectorIteratorsTo(inactive_, earliestStart); - RevertVectorIteratorsTo(fixed_, earliestStart); - - // Scan the rest and undo each interval that expired after t and - // insert it in active (the next iteration of the algorithm will - // put it in inactive if required) - for (unsigned i = 0, e = handled_.size(); i != e; ++i) { - LiveInterval *HI = handled_[i]; - if (!HI->expiredAt(earliestStart) && - HI->expiredAt(cur->beginIndex())) { - DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n'); - active_.push_back(std::make_pair(HI, HI->begin())); - assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); - addRegUse(vrm_->getPhys(HI->reg)); - } - } - - // Merge added with unhandled. - // This also update the NextReloadMap. That is, it adds mapping from a - // register defined by a reload from SS to the next reload from SS in the - // same basic block. - MachineBasicBlock *LastReloadMBB = 0; - LiveInterval *LastReload = 0; - int LastReloadSS = VirtRegMap::NO_STACK_SLOT; - std::sort(added.begin(), added.end(), LISorter()); - for (unsigned i = 0, e = added.size(); i != e; ++i) { - LiveInterval *ReloadLi = added[i]; - if (ReloadLi->weight == HUGE_VALF && - li_->getApproximateInstructionCount(*ReloadLi) == 0) { - SlotIndex ReloadIdx = ReloadLi->beginIndex(); - MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); - int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); - if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { - // Last reload of same SS is in the same MBB. We want to try to - // allocate both reloads the same register and make sure the reg - // isn't clobbered in between if at all possible. - assert(LastReload->beginIndex() < ReloadIdx); - NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); - } - LastReloadMBB = ReloadMBB; - LastReload = ReloadLi; - LastReloadSS = ReloadSS; - } - unhandled_.push(ReloadLi); - } -} - -unsigned RALinScan::getFreePhysReg(LiveInterval* cur, - const TargetRegisterClass *RC, - unsigned MaxInactiveCount, - SmallVector<unsigned, 256> &inactiveCounts, - bool SkipDGRegs) { - unsigned FreeReg = 0; - unsigned FreeRegInactiveCount = 0; - - std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg); - // Resolve second part of the hint (if possible) given the current allocation. - unsigned physReg = Hint.second; - if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) - physReg = vrm_->getPhys(physReg); - - ArrayRef<unsigned> Order; - if (Hint.first) - Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_); - else - Order = RegClassInfo.getOrder(RC); - - assert(!Order.empty() && "No allocatable register in this register class!"); - - // Scan for the first available register. - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned Reg = Order[i]; - // Ignore "downgraded" registers. - if (SkipDGRegs && DowngradedRegs.count(Reg)) - continue; - // Skip reserved registers. - if (reservedRegs_.test(Reg)) - continue; - // Skip recently allocated registers. - if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { - FreeReg = Reg; - if (FreeReg < inactiveCounts.size()) - FreeRegInactiveCount = inactiveCounts[FreeReg]; - else - FreeRegInactiveCount = 0; - break; - } - } - - // If there are no free regs, or if this reg has the max inactive count, - // return this register. - if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) { - // Remember what register we picked so we can skip it next time. - if (FreeReg != 0) recordRecentlyUsed(FreeReg); - return FreeReg; - } - - // Continue scanning the registers, looking for the one with the highest - // inactive count. Alkis found that this reduced register pressure very - // slightly on X86 (in rev 1.94 of this file), though this should probably be - // reevaluated now. - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned Reg = Order[i]; - // Ignore "downgraded" registers. - if (SkipDGRegs && DowngradedRegs.count(Reg)) - continue; - // Skip reserved registers. - if (reservedRegs_.test(Reg)) - continue; - if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && - (!SkipDGRegs || !isRecentlyUsed(Reg))) { - FreeReg = Reg; - FreeRegInactiveCount = inactiveCounts[Reg]; - if (FreeRegInactiveCount == MaxInactiveCount) - break; // We found the one with the max inactive count. - } - } - - // Remember what register we picked so we can skip it next time. - recordRecentlyUsed(FreeReg); - - return FreeReg; -} - -/// getFreePhysReg - return a free physical register for this virtual register -/// interval if we have one, otherwise return 0. -unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { - SmallVector<unsigned, 256> inactiveCounts; - unsigned MaxInactiveCount = 0; - - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - - // If this is not in a related reg class to the register we're allocating, - // don't check it. - const TargetRegisterClass *RegRC = mri_->getRegClass(reg); - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { - reg = vrm_->getPhys(reg); - if (inactiveCounts.size() <= reg) - inactiveCounts.resize(reg+1); - ++inactiveCounts[reg]; - MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); - } - } - - // If copy coalescer has assigned a "preferred" register, check if it's - // available first. - unsigned Preference = vrm_->getRegAllocPref(cur->reg); - if (Preference) { - DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); - if (isRegAvail(Preference) && - RC->contains(Preference)) - return Preference; - } - - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); -} - -FunctionPass* llvm::createLinearScanRegisterAllocator() { - return new RALinScan(); -} diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 0d2cf2d..845ee12 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,10 +31,11 @@ #define DEBUG_TYPE "regalloc" +#include "LiveRangeEdit.h" #include "RenderMachineFunction.h" +#include "Spiller.h" #include "Splitter.h" #include "VirtRegMap.h" -#include "VirtRegRewriter.h" #include "RegisterCoalescer.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -132,6 +133,7 @@ private: MachineRegisterInfo *mri; RenderMachineFunction *rmf; + std::auto_ptr<Spiller> spiller; LiveIntervals *lis; LiveStacks *lss; VirtRegMap *vrm; @@ -141,10 +143,6 @@ private: /// \brief Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(); - /// \brief Adds a stack interval if the given live interval has been - /// spilled. Used to support stack slot coloring. - void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); - /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, @@ -488,29 +486,6 @@ void RegAllocPBQP::findVRegIntervalsToAlloc() { } } -void RegAllocPBQP::addStackInterval(const LiveInterval *spilled, - MachineRegisterInfo* mri) { - int stackSlot = vrm->getStackSlot(spilled->reg); - - if (stackSlot == VirtRegMap::NO_STACK_SLOT) { - return; - } - - const TargetRegisterClass *RC = mri->getRegClass(spilled->reg); - LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC); - - VNInfo *vni; - if (stackInterval.getNumValNums() != 0) { - vni = stackInterval.getValNumInfo(0); - } else { - vni = stackInterval.getNextValue( - SlotIndex(), 0, lss->getVNInfoAllocator()); - } - - LiveInterval &rhsInterval = lis->getInterval(spilled->reg); - stackInterval.MergeRangesInAsValue(rhsInterval, vni); -} - bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, const PBQP::Solution &solution) { // Set to true if we have any spills @@ -535,22 +510,16 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); - const LiveInterval* spillInterval = &lis->getInterval(vreg); - double oldWeight = spillInterval->weight; - rmf->rememberUseDefs(spillInterval); - std::vector<LiveInterval*> newSpills = - lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm); - addStackInterval(spillInterval, mri); - rmf->rememberSpills(spillInterval, newSpills); - - (void) oldWeight; + SmallVector<LiveInterval*, 8> newSpills; + LiveRangeEdit LRE(lis->getInterval(vreg), newSpills); + spiller->spill(LRE); + DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " - << oldWeight << ", New vregs: "); + << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. - for (std::vector<LiveInterval*>::const_iterator - itr = newSpills.begin(), end = newSpills.end(); + for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { assert(!(*itr)->empty() && "Empty spill range."); DEBUG(dbgs() << (*itr)->reg << " "); @@ -560,7 +529,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. - anotherRoundNeeded |= !newSpills.empty(); + anotherRoundNeeded |= !LRE.empty(); } else { assert(false && "Unknown allocation option."); } @@ -650,6 +619,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { rmf = &getAnalysis<RenderMachineFunction>(); vrm = &getAnalysis<VirtRegMap>(); + spiller.reset(createInlineSpiller(*this, MF, *vrm)); DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); @@ -698,9 +668,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); // Run rewriter - std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter()); - - rewriter->runOnMachineFunction(*mf, *vrm, lis); + vrm->rewrite(lis->getSlotIndexes()); return true; } diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 9b414d6..22d6a3b 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -423,7 +423,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. @@ -438,7 +438,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); // AValNo is the value number in A that defines the copy, A3 in the example. - SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); + SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); // The live range might not exist after fun with physreg coalescing. if (ALR == IntA.end()) return false; @@ -625,7 +625,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (!LIS->hasInterval(CP.getDstReg())) return false; - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); LiveInterval &IntA = LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); @@ -641,7 +641,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); // AValNo is the value number in A that defines the copy, A3 in the example. - VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); // If other defs can reach uses of this def, then it's not safe to perform @@ -747,7 +747,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, UseMO.setReg(NewReg); continue; } - SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex(); + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; @@ -765,7 +765,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // This copy will become a noop. If it's defining a new val#, merge it into // BValNo. - SlotIndex DefIdx = UseIdx.getDefIndex(); + SlotIndex DefIdx = UseIdx.getRegSlot(); VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); if (!DVNI) continue; @@ -799,7 +799,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, bool preserveSrcInt, unsigned DstReg, MachineInstr *CopyMI) { - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; @@ -887,7 +887,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, DstInt = SrcInt; SrcInt = 0; - VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex()); + VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); assert(DeadVNI && "No value defined in DstInt"); DstInt->removeValNo(DeadVNI); @@ -1013,7 +1013,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, /// the val# it defines. If the live interval becomes empty, remove it as well. bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot(); LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); if (DefIdx != MLR->valno->def) return false; @@ -1023,7 +1023,7 @@ bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); if (LIS->hasInterval(DstReg)) { LiveInterval &LI = LIS->getInterval(DstReg); if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) @@ -1936,7 +1936,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // Check for now unnecessary kill flags. if (LIS->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isKill()) continue; diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index 8b02ec4..448f2fb 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -560,12 +560,13 @@ namespace llvm { // For uses/defs recorded use/def indexes override current liveness and // instruction operands (Only for the interval which records the indexes). - if (i.isUse() || i.isDef()) { + // FIXME: This is all wrong, uses and defs share the same slots. + if (i.isEarlyClobber() || i.isRegister()) { UseDefs::const_iterator udItr = useDefs.find(li); if (udItr != useDefs.end()) { const SlotSet &slotSet = udItr->second; if (slotSet.count(i)) { - if (i.isUse()) { + if (i.isEarlyClobber()) { return Used; } // else @@ -586,9 +587,9 @@ namespace llvm { return AliveStack; } } else { - if (i.isDef() && mi->definesRegister(li->reg, tri)) { + if (i.isRegister() && mi->definesRegister(li->reg, tri)) { return Defined; - } else if (i.isUse() && mi->readsRegister(li->reg)) { + } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) { return Used; } else { if (vrm == 0 || @@ -804,7 +805,7 @@ namespace llvm { os << indent + s(2) << "<tr height=6ex>\n"; // Render the code column. - if (i.isLoad()) { + if (i.isBlock()) { MachineBasicBlock *mbb = sis->getMBBFromIndex(i); mi = sis->getInstructionFromIndex(i); @@ -823,7 +824,7 @@ namespace llvm { } os << indent + s(4) << "</td>\n"; } else { - i = i.getStoreIndex(); // <- Will be incremented to the next index. + i = i.getDeadSlot(); // <- Will be incremented to the next index. continue; } } @@ -952,10 +953,10 @@ namespace llvm { rItr != rEnd; ++rItr) { const MachineInstr *mi = &*rItr; if (mi->readsRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex()); + useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true)); } if (mi->definesRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex()); + useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot()); } } } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8b28ea9..4384db8 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -279,7 +279,7 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list @@ -944,8 +944,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; - LegalOperations = Level >= NoIllegalOperations; - LegalTypes = Level >= NoIllegalTypes; + LegalOperations = Level >= AfterLegalizeVectorOps; + LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. WorkList.reserve(DAG.allnodes_size()); @@ -5471,7 +5471,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5496,7 +5496,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5875,7 +5875,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { /// the add / subtract in and all of its other uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; @@ -6007,7 +6007,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { /// load / store effectively and all of its uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 0e864fe..0bca55f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -134,14 +134,39 @@ private: void ExpandNode(SDNode *Node); void PromoteNode(SDNode *Node); - // DAGUpdateListener implementation. - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void ForgetNode(SDNode *N) { LegalizedNodes.erase(N); if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) ++LegalizePosition; } +public: + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + ForgetNode(N); + } virtual void NodeUpdated(SDNode *N) {} + + // Node replacement helpers + void ReplacedNode(SDNode *N) { + if (N->use_empty()) { + DAG.RemoveDeadNode(N, this); + } else { + ForgetNode(N); + } + } + void ReplaceNode(SDNode *Old, SDNode *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } + void ReplaceNode(SDValue Old, SDValue New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old.getNode()); + } + void ReplaceNode(SDNode *Old, const SDValue *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } }; } @@ -267,7 +292,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, - SelectionDAG::DAGUpdateListener *DUL) { + SelectionDAGLegalize *DAGLegalize) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -284,8 +309,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), Alignment); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); return; } // Do a (aligned) store to a stack slot, then copy from the stack slot @@ -349,8 +373,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], Stores.size()); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); return; } assert(ST->getMemoryVT().isInteger() && @@ -382,8 +405,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. @@ -824,7 +846,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.ReplaceAllUsesWith(Node, NewNode, this); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); - DAG.RemoveDeadNode(Node, this); + ReplacedNode(Node); Node = NewNode; } switch (Action) { @@ -846,7 +868,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); - DAG.RemoveDeadNode(Node, this); + ReplacedNode(Node); } return; } @@ -881,7 +903,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp2, LD->getOffset()); Tmp3 = SDValue(Node, 0); Tmp4 = SDValue(Node, 1); @@ -920,10 +941,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; } } - // Since loads produce two values, make sure to remember that we - // legalized both of them. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + if (Tmp4.getNode() != Node) { + assert(Tmp3.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + ReplacedNode(Node); + } return; } @@ -1058,8 +1081,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Node = DAG.UpdateNodeOperands(Node, - Tmp1, Tmp2, LD->getOffset()); Tmp1 = SDValue(Node, 0); Tmp2 = SDValue(Node, 1); @@ -1135,8 +1156,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // Since loads produce two values, make sure to remember that we legalized // both of them. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + if (Tmp2.getNode() != Node) { + assert(Tmp1.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + ReplacedNode(Node); + } break; } case ISD::STORE: { @@ -1149,17 +1174,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - DAG.ReplaceAllUsesWith(ST, OptStore, this); - DAG.RemoveDeadNode(ST, this); + ReplaceNode(ST, OptStore); break; } { Tmp3 = ST->getValue(); - Node = DAG.UpdateNodeOperands(Node, - Tmp1, Tmp3, Tmp2, - ST->getOffset()); - EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: assert(0 && "This action is not supported yet!"); @@ -1176,10 +1196,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) { - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); - DAG.RemoveDeadNode(Node, this); - } + if (Tmp1.getNode()) + ReplaceNode(SDValue(Node, 0), Tmp1); break; case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); @@ -1189,8 +1207,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); break; } } @@ -1212,8 +1229,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), NVT, isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1268,14 +1284,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // The order of the stores doesn't matter. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); } else { - if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || - Tmp2 != ST->getBasePtr()) - Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp3, Tmp2, - ST->getOffset()); - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); case TargetLowering::Legal: @@ -1289,10 +1299,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } break; case TargetLowering::Custom: - DAG.ReplaceAllUsesWith(SDValue(Node, 0), - TLI.LowerOperation(SDValue(Node, 0), DAG), - this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), + TLI.LowerOperation(SDValue(Node, 0), DAG)); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1304,8 +1312,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); break; } } @@ -3376,8 +3383,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Scalars[0], Scalars.size()); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); break; } case ISD::GLOBAL_OFFSET_TABLE: @@ -3394,10 +3400,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) { - DAG.ReplaceAllUsesWith(Node, Results.data(), this); - DAG.RemoveDeadNode(Node, this); - } + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { @@ -3531,10 +3535,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) { - DAG.ReplaceAllUsesWith(Node, Results.data(), this); - DAG.RemoveDeadNode(Node, this); - } + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2964bd3..5cbce3f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -487,7 +487,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in pre-legalize mode. { NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); - CurDAG->Combine(Unrestricted, *AA, OptLevel); + CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber @@ -515,7 +515,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize types", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber @@ -540,7 +540,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); } DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" @@ -562,7 +562,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in post-legalize mode. { NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); } DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index ca79caf..6a7666e 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -76,7 +76,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MachineBasicBlock *mbb = &*mbbItr; // Insert an index for the MBB start. - SlotIndex blockStartIndex(back(), SlotIndex::LOAD); + SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block); for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -88,7 +88,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); + mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), + SlotIndex::Slot_Block))); ++functionSize; } @@ -97,7 +98,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; - MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD); + MBBRanges[mbb->getNumber()].second = SlotIndex(back(), + SlotIndex::Slot_Block); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } @@ -166,7 +168,7 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { if (isValid()) - os << entry().getIndex() << "LudS"[getSlot()]; + os << entry().getIndex() << "Berd"[getSlot()]; else os << "invalid"; } diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index b6bbcd7..9083804 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -29,7 +29,7 @@ using namespace llvm; namespace { - enum SpillerName { trivial, standard, inline_ }; + enum SpillerName { trivial, inline_ }; } static cl::opt<SpillerName> @@ -37,10 +37,9 @@ spillerOpt("spiller", cl::desc("Spiller to use: (default: standard)"), cl::Prefix, cl::values(clEnumVal(trivial, "trivial spiller"), - clEnumVal(standard, "default spiller"), clEnumValN(inline_, "inline", "inline spiller"), clEnumValEnd), - cl::init(standard)); + cl::init(trivial)); // Spiller virtual destructor implementation. Spiller::~Spiller() {} @@ -140,8 +139,7 @@ protected: tri); MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = - lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); - vrm->addSpillSlotUse(ss, loadInstr); + lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator()); @@ -154,8 +152,7 @@ protected: true, ss, trc, tri); MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = - lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); - vrm->addSpillSlotUse(ss, storeInstr); + lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator()); @@ -188,54 +185,12 @@ public: } // end anonymous namespace -namespace { - -/// Falls back on LiveIntervals::addIntervalsForSpills. -class StandardSpiller : public Spiller { -protected: - MachineFunction *mf; - LiveIntervals *lis; - LiveStacks *lss; - MachineLoopInfo *loopInfo; - VirtRegMap *vrm; -public: - StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : mf(&mf), - lis(&pass.getAnalysis<LiveIntervals>()), - lss(&pass.getAnalysis<LiveStacks>()), - loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()), - vrm(&vrm) {} - - /// Falls back on LiveIntervals::addIntervalsForSpills. - void spill(LiveRangeEdit &LRE) { - std::vector<LiveInterval*> added = - lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(), - loopInfo, *vrm); - LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(), - added.begin(), added.end()); - - // Update LiveStacks. - int SS = vrm->getStackSlot(LRE.getReg()); - if (SS == VirtRegMap::NO_STACK_SLOT) - return; - const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg()); - LiveInterval &SI = lss->getOrCreateInterval(SS, RC); - if (!SI.hasAtLeastOneValue()) - SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator()); - SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0)); - } -}; - -} // end anonymous namespace - llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { switch (spillerOpt) { default: assert(0 && "unknown spiller"); case trivial: return new TrivialSpiller(pass, mf, vrm); - case standard: return new StandardSpiller(pass, mf, vrm); case inline_: return createInlineSpiller(pass, mf, vrm); } } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 6362780..751d604 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -112,7 +112,7 @@ void SplitAnalysis::analyzeUses() { I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; ++I) if (!I.getOperand().isUndef()) - UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex()); + UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot()); array_pod_sort(UseSlots.begin(), UseSlots.end()); @@ -366,14 +366,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; - LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI)); + LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); // No longer a simple mapping. Switch to a complex, non-forced mapping. InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI SlotIndex Def = VNI->def; - LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -393,7 +393,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; - Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. VFP = ValueForcePair(0, true); } @@ -421,7 +421,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) .addReg(Edit->getReg()); Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) - .getDefIndex(); + .getRegSlot(); ++NumCopies; } @@ -586,7 +586,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); - assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) && + assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) && "Parent changes value in extended range"); assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); @@ -640,7 +640,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); } else { - SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex(); + SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot(); DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); AssignI.setStop(Kill); } @@ -958,7 +958,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // use the same register as the def, so just do that always. SlotIndex Idx = LIS.getInstructionIndex(MI); if (MO.isDef() || MO.isUndef()) - Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex(); + Idx = Idx.getRegSlot(MO.isEarlyClobber()); // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); @@ -981,7 +981,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { if (!Edit->getParent().liveAt(Idx)) continue; } else - Idx = Idx.getUseIndex(); + Idx = Idx.getRegSlot(true); getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); @@ -994,8 +994,8 @@ void SplitEditor::deleteRematVictims() { LiveInterval *LI = *I; for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); LII != LIE; ++LII) { - // Dead defs end at the store slot. - if (LII->end != LII->valno->def.getNextSlot()) + // Dead defs end at the dead slot. + if (LII->end != LII->valno->def.getDeadSlot()) continue; MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); assert(MI && "Missing instruction for dead def"); diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp index 77973b7..16cf9b8 100644 --- a/lib/CodeGen/Splitter.cpp +++ b/lib/CodeGen/Splitter.cpp @@ -141,7 +141,7 @@ namespace llvm { ls.lis->InsertMachineInstrInMaps(copy); - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot(); VNInfo *newVal = getNewVNI(preHeaderRange->valno); newVal->def = copyDefIdx; @@ -175,7 +175,7 @@ namespace llvm { ls.lis->InsertMachineInstrInMaps(copy); - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot(); // Blow away output range definition. outRange->valno->def = ls.lis->getInvalidIndex(); @@ -216,13 +216,13 @@ namespace llvm { SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr); if (instr.modifiesRegister(li.reg, 0)) { LiveRange *defRange = - li.getLiveRangeContaining(instrIdx.getDefIndex()); + li.getLiveRangeContaining(instrIdx.getRegSlot()); if (defRange != 0) // May have caught this already. copyRange(*defRange); } if (instr.readsRegister(li.reg, 0)) { LiveRange *useRange = - li.getLiveRangeContaining(instrIdx.getUseIndex()); + li.getLiveRangeContaining(instrIdx.getRegSlot(true)); if (useRange != 0) { // May have caught this already. copyRange(*useRange); } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 57cbe1b..f8177a2 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -40,29 +40,17 @@ DisableSharing("no-stack-slot-sharing", cl::init(false), cl::Hidden, cl::desc("Suppress slot sharing during stack coloring")); -static cl::opt<bool> -ColorWithRegsOpt("color-ss-with-regs", - cl::init(false), cl::Hidden, - cl::desc("Color stack slots with free registers")); - - static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); -STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs"); -STATISTIC(NumLoadElim, "Number of loads eliminated"); -STATISTIC(NumStoreElim, "Number of stores eliminated"); STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); namespace { class StackSlotColoring : public MachineFunctionPass { bool ColorWithRegs; LiveStacks* LS; - VirtRegMap* VRM; MachineFrameInfo *MFI; - MachineRegisterInfo *MRI; const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; const MachineLoopInfo *loopInfo; // SSIntervals - Spill slot intervals. @@ -127,22 +115,8 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, - SmallVector<SmallVector<int, 4>, 16> &RevMap, - BitVector &SlotIsReg); void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, MachineFunction &MF); - bool PropagateBackward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg); - bool PropagateForward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg); - void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, - unsigned Reg, const TargetRegisterClass *RC, - SmallSet<unsigned, 4> &Defs, - MachineFunction &MF); - bool AllMemRefsCanBeUnfolded(int SS); bool RemoveDeadStores(MachineBasicBlock* MBB); }; } // end anonymous namespace @@ -248,79 +222,6 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { return false; } -/// ColorSlotsWithFreeRegs - If there are any free registers available, try -/// replacing spill slots references with registers instead. -bool -StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, - SmallVector<SmallVector<int, 4>, 16> &RevMap, - BitVector &SlotIsReg) { - if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters()) - return false; - - bool Changed = false; - DEBUG(dbgs() << "Assigning unused registers to spill slots:\n"); - for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { - LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); - if (!UsedColors[SS] || li->weight < 20) - // If the weight is < 20, i.e. two references in a loop with depth 1, - // don't bother with it. - continue; - - // These slots allow to share the same registers. - bool AllColored = true; - SmallVector<unsigned, 4> ColoredRegs; - for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) { - int RSS = RevMap[SS][j]; - const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS); - // If it's not colored to another stack slot, try coloring it - // to a "free" register. - if (!RC) { - AllColored = false; - continue; - } - unsigned Reg = VRM->getFirstUnusedRegister(RC); - if (!Reg) { - AllColored = false; - continue; - } - if (!AllMemRefsCanBeUnfolded(RSS)) { - AllColored = false; - continue; - } else { - DEBUG(dbgs() << "Assigning fi#" << RSS << " to " - << TRI->getName(Reg) << '\n'); - ColoredRegs.push_back(Reg); - SlotMapping[RSS] = Reg; - SlotIsReg.set(RSS); - Changed = true; - } - } - - // Register and its sub-registers are no longer free. - while (!ColoredRegs.empty()) { - unsigned Reg = ColoredRegs.back(); - ColoredRegs.pop_back(); - VRM->setRegisterUsed(Reg); - // If reg is a callee-saved register, it will have to be spilled in - // the prologue. - MRI->setPhysRegUsed(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - VRM->setRegisterUsed(*AS); - MRI->setPhysRegUsed(*AS); - } - } - // This spill slot is dead after the rewrites - if (AllColored) { - MFI->RemoveStackObject(SS); - ++NumEliminated; - } - } - DEBUG(dbgs() << '\n'); - - return Changed; -} - /// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. /// int StackSlotColoring::ColorSlot(LiveInterval *li) { @@ -372,7 +273,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { SmallVector<int, 16> SlotMapping(NumObjs, -1); SmallVector<float, 16> SlotWeights(NumObjs, 0.0); SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); - BitVector SlotIsReg(NumObjs); BitVector UsedColors(NumObjs); DEBUG(dbgs() << "Color spill slot intervals:\n"); @@ -404,31 +304,19 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { DEBUG(dbgs() << '\n'); #endif - // Can we "color" a stack slot with a unused register? - Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg); - if (!Changed) return false; // Rewrite all MO_FrameIndex operands. SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { - bool isReg = SlotIsReg[SS]; int NewFI = SlotMapping[SS]; - if (NewFI == -1 || (NewFI == (int)SS && !isReg)) + if (NewFI == -1 || (NewFI == (int)SS)) continue; - const TargetRegisterClass *RC = LS->getIntervalRegClass(SS); SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) - if (!isReg) - RewriteInstruction(RefMIs[i], SS, NewFI, MF); - else { - // Rewrite to use a register instead. - unsigned MBBId = RefMIs[i]->getParent()->getNumber(); - SmallSet<unsigned, 4> &Defs = NewDefs[MBBId]; - UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF); - } + RewriteInstruction(RefMIs[i], SS, NewFI, MF); } // Delete unused stack slots. @@ -441,28 +329,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { return true; } -/// AllMemRefsCanBeUnfolded - Return true if all references of the specified -/// spill slot index can be unfolded. -bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { - SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; - for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) { - MachineInstr *MI = RefMIs[i]; - if (TII->isLoadFromStackSlot(MI, SS) || - TII->isStoreToStackSlot(MI, SS)) - // Restore and spill will become copies. - return true; - if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false)) - return false; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (MO.isFI() && MO.getIndex() != SS) - // If it uses another frameindex, we can, currently* unfold it. - return false; - } - } - return true; -} - /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, @@ -489,179 +355,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, (*I)->setValue(NewSV); } -/// PropagateBackward - Traverse backward and look for the definition of -/// OldReg. If it can successfully update all of the references with NewReg, -/// do so and return true. -bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg) { - if (MII == MBB->begin()) - return false; - - SmallVector<MachineOperand*, 4> Uses; - SmallVector<MachineOperand*, 4> Refs; - while (--MII != MBB->begin()) { - bool FoundDef = false; // Not counting 2address def. - - Uses.clear(); - const MCInstrDesc &MCID = MII->getDesc(); - for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MII->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (Reg == OldReg) { - if (MO.isImplicit()) - return false; - - // Abort the use is actually a sub-register def. We don't have enough - // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isSubregToReg()) - return false; - - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); - if (RC && !RC->contains(NewReg)) - return false; - - if (MO.isUse()) { - Uses.push_back(&MO); - } else { - Refs.push_back(&MO); - if (!MII->isRegTiedToUseOperand(i)) - FoundDef = true; - } - } else if (TRI->regsOverlap(Reg, NewReg)) { - return false; - } else if (TRI->regsOverlap(Reg, OldReg)) { - if (!MO.isUse() || !MO.isKill()) - return false; - } - } - - if (FoundDef) { - // Found non-two-address def. Stop here. - for (unsigned i = 0, e = Refs.size(); i != e; ++i) - Refs[i]->setReg(NewReg); - return true; - } - - // Two-address uses must be updated as well. - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - Refs.push_back(Uses[i]); - } - return false; -} - -/// PropagateForward - Traverse forward and look for the kill of OldReg. If -/// it can successfully update all of the uses with NewReg, do so and -/// return true. -bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg) { - if (MII == MBB->end()) - return false; - - SmallVector<MachineOperand*, 4> Uses; - while (++MII != MBB->end()) { - bool FoundKill = false; - const MCInstrDesc &MCID = MII->getDesc(); - for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MII->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (Reg == OldReg) { - if (MO.isDef() || MO.isImplicit()) - return false; - - // Abort the use is actually a sub-register use. We don't have enough - // information to figure out if it is really legal. - if (MO.getSubReg()) - return false; - - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); - if (RC && !RC->contains(NewReg)) - return false; - if (MO.isKill()) - FoundKill = true; - - Uses.push_back(&MO); - } else if (TRI->regsOverlap(Reg, NewReg) || - TRI->regsOverlap(Reg, OldReg)) - return false; - } - if (FoundKill) { - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - Uses[i]->setReg(NewReg); - return true; - } - } - return false; -} - -/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding -/// folded memory references and replacing those references with register -/// references instead. -void -StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, - unsigned Reg, - const TargetRegisterClass *RC, - SmallSet<unsigned, 4> &Defs, - MachineFunction &MF) { - MachineBasicBlock *MBB = MI->getParent(); - if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { - if (PropagateForward(MI, MBB, DstReg, Reg)) { - DEBUG(dbgs() << "Eliminated load: "); - DEBUG(MI->dump()); - ++NumLoadElim; - } else { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), - DstReg).addReg(Reg); - ++NumRegRepl; - } - - if (!Defs.count(Reg)) { - // If this is the first use of Reg in this MBB and it wasn't previously - // defined in MBB, add it to livein. - MBB->addLiveIn(Reg); - Defs.insert(Reg); - } - } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { - if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { - DEBUG(dbgs() << "Eliminated store: "); - DEBUG(MI->dump()); - ++NumStoreElim; - } else { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(SrcReg); - ++NumRegRepl; - } - - // Remember reg has been defined in MBB. - Defs.insert(Reg); - } else { - SmallVector<MachineInstr*, 4> NewMIs; - bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs); - (void)Success; // Silence compiler warning. - assert(Success && "Failed to unfold!"); - MachineInstr *NewMI = NewMIs[0]; - MBB->insert(MI, NewMI); - ++NumRegRepl; - - if (NewMI->readsRegister(Reg)) { - if (!Defs.count(Reg)) - // If this is the first use of Reg in this MBB and it wasn't previously - // defined in MBB, add it to livein. - MBB->addLiveIn(Reg); - Defs.insert(Reg); - } - } - MBB->erase(MI); -} /// RemoveDeadStores - Scan through a basic block and look for loads followed /// by stores. If they're both using the same stack slot, then the store is @@ -718,21 +411,16 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { }); MFI = MF.getFrameInfo(); - MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); LS = &getAnalysis<LiveStacks>(); - VRM = &getAnalysis<VirtRegMap>(); loopInfo = &getAnalysis<MachineLoopInfo>(); bool Changed = false; unsigned NumSlots = LS->getNumIntervals(); - if (NumSlots < 2) { - if (NumSlots == 0 || !VRM->HasUnusedRegisters()) - // Nothing to do! - return false; - } + if (NumSlots == 0) + // Nothing to do! + return false; // If there are calls to setjmp or sigsetjmp, don't perform stack slot // coloring. The stack could be modified before the longjmp is executed, diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 260cc0e..8c6e44b 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -390,7 +390,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { MachineOperand *LastUse = findLastUse(MBB, SrcReg); assert(LastUse); SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); - SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); + SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } @@ -745,7 +745,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, // Set the phi-def flag for the VN at this PHI. SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex()); + VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); DestVNI->setIsPHIDef(true); @@ -756,7 +756,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); DestVNI->def = MBBStartIndex; DestLI.addRange(LiveRange(MBBStartIndex, - PHIIndex.getDefIndex(), + PHIIndex.getRegSlot(), DestVNI)); return; } @@ -783,18 +783,18 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, LI->getVNInfoAllocator()); CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getDefIndex(), + DestCopyIndex.getRegSlot(), CopyVNI)); // Adjust DestReg's live interval to adjust for its new definition at // CopyInstr. LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex()); + DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); - VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI); - DestVNI->def = DestCopyIndex.getDefIndex(); + DestVNI->def = DestCopyIndex.getRegSlot(); InsertedDestCopies[CopyReg] = CopyInstr; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index d879378..6796312 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -56,11 +57,14 @@ STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReMats, "Number of instructions re-materialized"); STATISTIC(NumDeletes, "Number of dead instructions deleted"); +STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); +STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; AliasAnalysis *AA; @@ -120,6 +124,18 @@ namespace { MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned Dist); + bool isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI, MachineBasicBlock *MBB); + + bool RescheduleMIBelowKill(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool RescheduleKillAboveMI(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, @@ -467,6 +483,33 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } +/// findLocalKill - Look for an instruction below MI in the MBB that kills the +/// specified register. Returns null if there are any other Reg use between the +/// instructions. +static +MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, + MachineInstr *MI, MachineRegisterInfo *MRI, + DenseMap<MachineInstr*, unsigned> &DistanceMap) { + MachineInstr *KillMI = 0; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI == MI || UseMI->getParent() != MBB) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + continue; + if (!UI.getOperand().isKill()) + return 0; + if (KillMI) + return 0; // -O0 kill markers cannot be trusted? + KillMI = UseMI; + } + + return KillMI; +} + /// findOnlyInterestingUse - Given a register, if has a single in-basic block /// use, return the use instruction if it's a copy or a two-address use. static @@ -852,6 +895,282 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, return true; } +/// RescheduleMIBelowKill - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill +/// instruction in order to eliminate the need for the copy. +bool +TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + MachineInstr *MI = &*mi; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); + if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + const MCInstrDesc &MCID = KillMI->getDesc(); + if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() || + MCID.isTerminator()) + // Don't move pass calls, etc. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore; + if (!MI->isSafeToMove(TII, AA, SeenStore)) + return false; + + if (TII->getInstrLatency(InstrItins, MI) > 1) + // FIXME: Needs more sophisticated heuristics. + return false; + + SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) + Defs.insert(MOReg); + else + Uses.insert(MOReg); + } + + // Move the copies connected to MI down as well. + MachineBasicBlock::iterator From = MI; + MachineBasicBlock::iterator To = llvm::next(From); + while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) { + Defs.insert(To->getOperand(0).getReg()); + ++To; + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + const MCInstrDesc &OMCID = OtherMI->getDesc(); + if (OMCID.hasUnmodeledSideEffects() || OMCID.isCall() || OMCID.isBranch() || + OMCID.isTerminator()) + // Don't move pass calls, etc. + return false; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) { + if (Uses.count(MOReg)) + // Physical register use would be clobbered. + return false; + if (!MO.isDead() && Defs.count(MOReg)) + // May clobber a physical register def. + // FIXME: This may be too conservative. It's ok if the instruction + // is sunken completely below the use. + return false; + } else { + if (Defs.count(MOReg)) + return false; + if (MOReg != Reg && MO.isKill() && Uses.count(MOReg)) + // Don't want to extend other live ranges and update kills. + return false; + } + } + } + + // Move debug info as well. + while (From != MBB->begin() && llvm::prior(From)->isDebugValue()) + --From; + + // Copies following MI may have been moved as well. + nmi = To; + MBB->splice(KillPos, MBB, From, To); + DistanceMap.erase(DI); + + if (LV) { + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } else { + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + MO.setIsKill(false); + } + MI->addRegisterKilled(Reg, 0); + } + + return true; +} + +/// isDefTooClose - Return true if the re-scheduling will put the given +/// instruction too close to the defs of its register dependencies. +bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI, + MachineBasicBlock *MBB) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), + DE = MRI->def_end(); DI != DE; ++DI) { + MachineInstr *DefMI = &*DI; + if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike()) + continue; + if (DefMI == MI) + return true; // MI is defining something KillMI uses + DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI); + if (DDI == DistanceMap.end()) + return true; // Below MI + unsigned DefDist = DDI->second; + assert(Dist > DefDist && "Visited def already?"); + if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist)) + return true; + } + return false; +} + +/// RescheduleKillAboveMI - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the +/// current two-address instruction in order to eliminate the need for the +/// copy. +bool +TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + MachineInstr *MI = &*mi; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); + if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore; + if (!KillMI->isSafeToMove(TII, AA, SeenStore)) + return false; + + SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Kills; + SmallSet<unsigned, 2> Defs; + SmallSet<unsigned, 2> LiveDefs; + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + if (!MOReg) + continue; + if (isDefTooClose(MOReg, DI->second, MI, MBB)) + return false; + Uses.insert(MOReg); + if (MO.isKill() && MOReg != Reg) + Kills.insert(MOReg); + } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Defs.insert(MOReg); + if (!MO.isDead()) + LiveDefs.insert(MOReg); + } + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + const MCInstrDesc &MCID = OtherMI->getDesc(); + if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() || + MCID.isTerminator()) + // Don't move pass calls, etc. + return false; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse()) { + if (Defs.count(MOReg)) + // Moving KillMI can clobber the physical register if the def has + // not been seen. + return false; + if (Kills.count(MOReg)) + // Don't want to extend other live ranges and update kills. + return false; + } else { + if (Uses.count(MOReg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(MOReg) && + LiveDefs.count(MOReg)) + return false; + // Physical register def is seen. + Defs.erase(MOReg); + } + } + } + + // Move the old kill above MI, don't forget to move debug info as well. + MachineBasicBlock::iterator InsertPos = mi; + while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue()) + --InsertPos; + MachineBasicBlock::iterator From = KillMI; + MachineBasicBlock::iterator To = llvm::next(From); + while (llvm::prior(From)->isDebugValue()) + --From; + MBB->splice(InsertPos, MBB, From, To); + + nmi = llvm::prior(mi); // Backtrack so we process the moved instruction. + DistanceMap.erase(DI); + + if (LV) { + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } else { + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + MO.setIsKill(false); + } + MI->addRegisterKilled(Reg, 0); + } + return true; +} + /// TryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for @@ -863,17 +1182,18 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist, SmallPtrSet<MachineInstr*, 8> &Processed) { - const MCInstrDesc &MCID = mi->getDesc(); - unsigned regA = mi->getOperand(DstIdx).getReg(); - unsigned regB = mi->getOperand(SrcIdx).getReg(); + MachineInstr &MI = *mi; + const MCInstrDesc &MCID = MI.getDesc(); + unsigned regA = MI.getOperand(DstIdx).getReg(); + unsigned regB = MI.getOperand(SrcIdx).getReg(); assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); // If regA is dead and the instruction can be deleted, just delete // it so it doesn't clobber regB. - bool regBKilled = isKilled(*mi, regB, MRI, TII); - if (!regBKilled && mi->getOperand(DstIdx).isDead() && + bool regBKilled = isKilled(MI, regB, MRI, TII); + if (!regBKilled && MI.getOperand(DstIdx).isDead() && DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { ++NumDeletes; return true; // Done with this instruction. @@ -885,20 +1205,20 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (MCID.isCommutable() && mi->getNumOperands() >= 3 && - TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { + if (MCID.isCommutable() && MI.getNumOperands() >= 3 && + TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; else if (SrcIdx == SrcOp2) regCIdx = SrcOp1; if (regCIdx != ~0U) { - regC = mi->getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(*mi, regC, MRI, TII)) + regC = MI.getOperand(regCIdx).getReg(); + if (!regBKilled && isKilled(MI, regC, MRI, TII)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) { + else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -913,6 +1233,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + // If there is one more use of regB later in the same MBB, consider + // re-schedule this MI below it. + if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) { + ++NumReSchedDowns; + return true; + } + if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); @@ -928,6 +1255,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } } + // If there is one more use of regB later in the same MBB, consider + // re-schedule it before this MI if it's legal. + if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) { + ++NumReSchedUps; + return true; + } + // If this is an instruction with a load folded into it, try unfolding // the load, e.g. avoid this: // movq %rdx, %rcx @@ -940,7 +1274,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Determine if a load can be unfolded. unsigned LoadRegIndex; unsigned NewOpc = - TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), /*UnfoldLoad=*/true, /*UnfoldStore=*/false, &LoadRegIndex); @@ -950,12 +1284,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction &MF = *mbbi->getParent(); // Unfold the load. - DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - if (!TII->unfoldMemoryOperand(MF, mi, Reg, + if (!TII->unfoldMemoryOperand(MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -986,21 +1320,21 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) - LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]); else { assert(NewMIs[1]->killsRegister(MO.getReg()) && "Kill missing after load unfold!"); - LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]); } } - } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) { if (NewMIs[1]->registerDefIsDead(MO.getReg())) LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); else { @@ -1013,7 +1347,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } LV->addVirtualRegisterKilled(Reg, NewMIs[1]); } - mi->eraseFromParent(); + MI.eraseFromParent(); mi = NewMIs[1]; if (TransformSuccess) return true; @@ -1040,6 +1374,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); + InstrItins = TM.getInstrItineraryData(); LV = getAnalysisIfAvailable<LiveVariables>(); AA = &getAnalysis<AliasAnalysis>(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 8a1cdc0..1a78db7 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -19,7 +19,6 @@ #define DEBUG_TYPE "virtregmap" #include "VirtRegMap.h" #include "llvm/Function.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -32,12 +31,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include <algorithm> using namespace llvm; @@ -58,34 +53,11 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { TRI = mf.getTarget().getRegisterInfo(); MF = &mf; - ReMatId = MAX_STACK_SLOT+1; - LowSpillSlot = HighSpillSlot = NO_STACK_SLOT; - Virt2PhysMap.clear(); Virt2StackSlotMap.clear(); - Virt2ReMatIdMap.clear(); Virt2SplitMap.clear(); - Virt2SplitKillMap.clear(); - ReMatMap.clear(); - ImplicitDefed.clear(); - SpillSlotToUsesMap.clear(); - MI2VirtMap.clear(); - SpillPt2VirtMap.clear(); - RestorePt2VirtMap.clear(); - EmergencySpillMap.clear(); - EmergencySpillSlots.clear(); - - SpillSlotToUsesMap.resize(8); - ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs()); - - allocatableRCRegs.clear(); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - allocatableRCRegs.insert(std::make_pair(*I, - TRI->getAllocatableSet(mf, *I))); grow(); - return false; } @@ -93,24 +65,12 @@ void VirtRegMap::grow() { unsigned NumRegs = MF->getRegInfo().getNumVirtRegs(); Virt2PhysMap.resize(NumRegs); Virt2StackSlotMap.resize(NumRegs); - Virt2ReMatIdMap.resize(NumRegs); Virt2SplitMap.resize(NumRegs); - Virt2SplitKillMap.resize(NumRegs); - ReMatMap.resize(NumRegs); - ImplicitDefed.resize(NumRegs); } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - if (LowSpillSlot == NO_STACK_SLOT) - LowSpillSlot = SS; - if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) - HighSpillSlot = SS; - assert(SS >= LowSpillSlot && "Unexpected low spill slot"); - unsigned Idx = SS-LowSpillSlot; - while (Idx >= SpillSlotToUsesMap.size()) - SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); ++NumSpillSlots; return SS; } @@ -144,118 +104,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { Virt2StackSlotMap[virtReg] = SS; } -int VirtRegMap::assignVirtReMatId(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && - "attempt to assign re-mat id to already spilled register"); - Virt2ReMatIdMap[virtReg] = ReMatId; - return ReMatId++; -} - -void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && - "attempt to assign re-mat id to already spilled register"); - Virt2ReMatIdMap[virtReg] = id; -} - -int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { - std::map<const TargetRegisterClass*, int>::iterator I = - EmergencySpillSlots.find(RC); - if (I != EmergencySpillSlots.end()) - return I->second; - return EmergencySpillSlots[RC] = createSpillSlot(RC); -} - -void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { - if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) { - // If FI < LowSpillSlot, this stack reference was produced by - // instruction selection and is not a spill - if (FI >= LowSpillSlot) { - assert(FI >= 0 && "Spill slot index should not be negative!"); - assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() - && "Invalid spill slot"); - SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI); - } - } -} - -void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI, - MachineInstr *NewMI, ModRef MRInfo) { - // Move previous memory references folded to new instruction. - MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI); - for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI), - E = MI2VirtMap.end(); I != E && I->first == OldMI; ) { - MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second)); - MI2VirtMap.erase(I++); - } - - // add new memory reference - MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo))); -} - -void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) { - MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI); - MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo))); -} - -void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isFI()) - continue; - int FI = MO.getIndex(); - if (MF->getFrameInfo()->isFixedObjectIndex(FI)) - continue; - // This stack reference was produced by instruction selection and - // is not a spill - if (FI < LowSpillSlot) - continue; - assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() - && "Invalid spill slot"); - SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI); - } - MI2VirtMap.erase(MI); - SpillPt2VirtMap.erase(MI); - RestorePt2VirtMap.erase(MI); - EmergencySpillMap.erase(MI); -} - -/// FindUnusedRegisters - Gather a list of allocatable registers that -/// have not been allocated to any virtual register. -bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { - unsigned NumRegs = TRI->getNumRegs(); - UnusedRegs.reset(); - UnusedRegs.resize(NumRegs); - - BitVector Used(NumRegs); - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) - Used.set(Virt2PhysMap[Reg]); - } - - BitVector Allocatable = TRI->getAllocatableSet(*MF); - bool AnyUnused = false; - for (unsigned Reg = 1; Reg < NumRegs; ++Reg) { - if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) { - bool ReallyUnused = true; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - if (Used[*AS] || LIs->hasInterval(*AS)) { - ReallyUnused = false; - break; - } - } - if (ReallyUnused) { - AnyUnused = true; - UnusedRegs.set(Reg); - } - } - } - - return AnyUnused; -} - void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " @@ -332,7 +180,6 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { ++NumIdCopies; if (MI->getNumOperands() == 2) { DEBUG(dbgs() << "Deleting identity copy.\n"); - RemoveMachineInstrFromMaps(MI); if (Indexes) Indexes->removeMachineInstrFromMaps(MI); // It's safe to erase MI because MII has already been incremented. diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 03abff3..8cac311 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -18,22 +18,14 @@ #define LLVM_CODEGEN_VIRTREGMAP_H #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include <map> namespace llvm { - class LiveIntervals; class MachineInstr; class MachineFunction; class MachineRegisterInfo; class TargetInstrInfo; - class TargetRegisterInfo; class raw_ostream; class SlotIndexes; @@ -45,18 +37,12 @@ namespace llvm { MAX_STACK_SLOT = (1L << 18)-1 }; - enum ModRef { isRef = 1, isMod = 2, isModRef = 3 }; - typedef std::multimap<MachineInstr*, - std::pair<unsigned, ModRef> > MI2VirtMapTy; - private: MachineRegisterInfo *MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineFunction *MF; - DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs; - /// Virt2PhysMap - This is a virtual to physical register /// mapping. Each virtual register is required to have an entry in /// it; even spilled virtual registers (the register mapped to a @@ -70,71 +56,10 @@ namespace llvm { /// at. IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap; - /// Virt2ReMatIdMap - This is virtual register to rematerialization id - /// mapping. Each spilled virtual register that should be remat'd has an - /// entry in it which corresponds to the remat id. - IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap; - /// Virt2SplitMap - This is virtual register to splitted virtual register /// mapping. IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap; - /// Virt2SplitKillMap - This is splitted virtual register to its last use - /// (kill) index mapping. - IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap; - - /// ReMatMap - This is virtual register to re-materialized instruction - /// mapping. Each virtual register whose definition is going to be - /// re-materialized has an entry in it. - IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap; - - /// MI2VirtMap - This is MachineInstr to virtual register - /// mapping. In the case of memory spill code being folded into - /// instructions, we need to know which virtual register was - /// read/written by this instruction. - MI2VirtMapTy MI2VirtMap; - - /// SpillPt2VirtMap - This records the virtual registers which should - /// be spilled right after the MachineInstr due to live interval - /// splitting. - std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > > - SpillPt2VirtMap; - - /// RestorePt2VirtMap - This records the virtual registers which should - /// be restored right before the MachineInstr due to live interval - /// splitting. - std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap; - - /// EmergencySpillMap - This records the physical registers that should - /// be spilled / restored around the MachineInstr since the register - /// allocator has run out of registers. - std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap; - - /// EmergencySpillSlots - This records emergency spill slots used to - /// spill physical registers when the register allocator runs out of - /// registers. Ideally only one stack slot is used per function per - /// register class. - std::map<const TargetRegisterClass*, int> EmergencySpillSlots; - - /// ReMatId - Instead of assigning a stack slot to a to be rematerialized - /// virtual register, an unique id is being assigned. This keeps track of - /// the highest id used so far. Note, this starts at (1<<18) to avoid - /// conflicts with stack slot numbers. - int ReMatId; - - /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes. - int LowSpillSlot, HighSpillSlot; - - /// SpillSlotToUsesMap - Records uses for each register spill slot. - SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap; - - /// ImplicitDefed - One bit for each virtual register. If set it indicates - /// the register is implicitly defined. - BitVector ImplicitDefed; - - /// UnusedRegs - A list of physical registers that have not been used. - BitVector UnusedRegs; - /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); @@ -144,11 +69,7 @@ namespace llvm { public: static char ID; VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), - Virt2StackSlotMap(NO_STACK_SLOT), - Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), - Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL), - ReMatId(MAX_STACK_SLOT+1), - LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } + Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { } virtual bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -235,8 +156,7 @@ namespace llvm { /// @brief returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. bool isAssignedReg(unsigned virtReg) const { - if (getStackSlot(virtReg) == NO_STACK_SLOT && - getReMatId(virtReg) == NO_STACK_SLOT) + if (getStackSlot(virtReg) == NO_STACK_SLOT) return true; // Split register can be assigned a physical register as well as a // stack slot or remat id. @@ -250,13 +170,6 @@ namespace llvm { return Virt2StackSlotMap[virtReg]; } - /// @brief returns the rematerialization id mapped to the specified virtual - /// register - int getReMatId(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2ReMatIdMap[virtReg]; - } - /// @brief create a mapping for the specifed virtual register to /// the next available stack slot int assignVirt2StackSlot(unsigned virtReg); @@ -264,250 +177,6 @@ namespace llvm { /// the specified stack slot void assignVirt2StackSlot(unsigned virtReg, int frameIndex); - /// @brief assign an unique re-materialization id to the specified - /// virtual register. - int assignVirtReMatId(unsigned virtReg); - /// @brief assign an unique re-materialization id to the specified - /// virtual register. - void assignVirtReMatId(unsigned virtReg, int id); - - /// @brief returns true if the specified virtual register is being - /// re-materialized. - bool isReMaterialized(unsigned virtReg) const { - return ReMatMap[virtReg] != NULL; - } - - /// @brief returns the original machine instruction being re-issued - /// to re-materialize the specified virtual register. - MachineInstr *getReMaterializedMI(unsigned virtReg) const { - return ReMatMap[virtReg]; - } - - /// @brief records the specified virtual register will be - /// re-materialized and the original instruction which will be re-issed - /// for this purpose. If parameter all is true, then all uses of the - /// registers are rematerialized and it's safe to delete the definition. - void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) { - ReMatMap[virtReg] = def; - } - - /// @brief record the last use (kill) of a split virtual register. - void addKillPoint(unsigned virtReg, SlotIndex index) { - Virt2SplitKillMap[virtReg] = index; - } - - SlotIndex getKillPoint(unsigned virtReg) const { - return Virt2SplitKillMap[virtReg]; - } - - /// @brief remove the last use (kill) of a split virtual register. - void removeKillPoint(unsigned virtReg) { - Virt2SplitKillMap[virtReg] = SlotIndex(); - } - - /// @brief returns true if the specified MachineInstr is a spill point. - bool isSpillPt(MachineInstr *Pt) const { - return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end(); - } - - /// @brief returns the virtual registers that should be spilled due to - /// splitting right after the specified MachineInstr. - std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) { - return SpillPt2VirtMap[Pt]; - } - - /// @brief records the specified MachineInstr as a spill point for virtReg. - void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) { - std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator - I = SpillPt2VirtMap.find(Pt); - if (I != SpillPt2VirtMap.end()) - I->second.push_back(std::make_pair(virtReg, isKill)); - else { - std::vector<std::pair<unsigned,bool> > Virts; - Virts.push_back(std::make_pair(virtReg, isKill)); - SpillPt2VirtMap.insert(std::make_pair(Pt, Virts)); - } - } - - /// @brief - transfer spill point information from one instruction to - /// another. - void transferSpillPts(MachineInstr *Old, MachineInstr *New) { - std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator - I = SpillPt2VirtMap.find(Old); - if (I == SpillPt2VirtMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back().first; - bool isKill = I->second.back().second; - I->second.pop_back(); - addSpillPoint(virtReg, isKill, New); - } - SpillPt2VirtMap.erase(I); - } - - /// @brief returns true if the specified MachineInstr is a restore point. - bool isRestorePt(MachineInstr *Pt) const { - return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end(); - } - - /// @brief returns the virtual registers that should be restoreed due to - /// splitting right after the specified MachineInstr. - std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) { - return RestorePt2VirtMap[Pt]; - } - - /// @brief records the specified MachineInstr as a restore point for virtReg. - void addRestorePoint(unsigned virtReg, MachineInstr *Pt) { - std::map<MachineInstr*, std::vector<unsigned> >::iterator I = - RestorePt2VirtMap.find(Pt); - if (I != RestorePt2VirtMap.end()) - I->second.push_back(virtReg); - else { - std::vector<unsigned> Virts; - Virts.push_back(virtReg); - RestorePt2VirtMap.insert(std::make_pair(Pt, Virts)); - } - } - - /// @brief - transfer restore point information from one instruction to - /// another. - void transferRestorePts(MachineInstr *Old, MachineInstr *New) { - std::map<MachineInstr*, std::vector<unsigned> >::iterator I = - RestorePt2VirtMap.find(Old); - if (I == RestorePt2VirtMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back(); - I->second.pop_back(); - addRestorePoint(virtReg, New); - } - RestorePt2VirtMap.erase(I); - } - - /// @brief records that the specified physical register must be spilled - /// around the specified machine instr. - void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) { - if (EmergencySpillMap.find(MI) != EmergencySpillMap.end()) - EmergencySpillMap[MI].push_back(PhysReg); - else { - std::vector<unsigned> PhysRegs; - PhysRegs.push_back(PhysReg); - EmergencySpillMap.insert(std::make_pair(MI, PhysRegs)); - } - } - - /// @brief returns true if one or more physical registers must be spilled - /// around the specified instruction. - bool hasEmergencySpills(MachineInstr *MI) const { - return EmergencySpillMap.find(MI) != EmergencySpillMap.end(); - } - - /// @brief returns the physical registers to be spilled and restored around - /// the instruction. - std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) { - return EmergencySpillMap[MI]; - } - - /// @brief - transfer emergency spill information from one instruction to - /// another. - void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) { - std::map<MachineInstr*,std::vector<unsigned> >::iterator I = - EmergencySpillMap.find(Old); - if (I == EmergencySpillMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back(); - I->second.pop_back(); - addEmergencySpill(virtReg, New); - } - EmergencySpillMap.erase(I); - } - - /// @brief return or get a emergency spill slot for the register class. - int getEmergencySpillSlot(const TargetRegisterClass *RC); - - /// @brief Return lowest spill slot index. - int getLowSpillSlot() const { - return LowSpillSlot; - } - - /// @brief Return highest spill slot index. - int getHighSpillSlot() const { - return HighSpillSlot; - } - - /// @brief Records a spill slot use. - void addSpillSlotUse(int FrameIndex, MachineInstr *MI); - - /// @brief Returns true if spill slot has been used. - bool isSpillSlotUsed(int FrameIndex) const { - assert(FrameIndex >= 0 && "Spill slot index should not be negative!"); - return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty(); - } - - /// @brief Mark the specified register as being implicitly defined. - void setIsImplicitlyDefined(unsigned VirtReg) { - ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg)); - } - - /// @brief Returns true if the virtual register is implicitly defined. - bool isImplicitlyDefined(unsigned VirtReg) const { - return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)]; - } - - /// @brief Updates information about the specified virtual register's value - /// folded into newMI machine instruction. - void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI, - ModRef MRInfo); - - /// @brief Updates information about the specified virtual register's value - /// folded into the specified machine instruction. - void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo); - - /// @brief returns the virtual registers' values folded in memory - /// operands of this instruction - std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator> - getFoldedVirts(MachineInstr* MI) const { - return MI2VirtMap.equal_range(MI); - } - - /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the - /// the folded instruction map and spill point map. - void RemoveMachineInstrFromMaps(MachineInstr *MI); - - /// FindUnusedRegisters - Gather a list of allocatable registers that - /// have not been allocated to any virtual register. - bool FindUnusedRegisters(LiveIntervals* LIs); - - /// HasUnusedRegisters - Return true if there are any allocatable registers - /// that have not been allocated to any virtual register. - bool HasUnusedRegisters() const { - return !UnusedRegs.none(); - } - - /// setRegisterUsed - Remember the physical register is now used. - void setRegisterUsed(unsigned Reg) { - UnusedRegs.reset(Reg); - } - - /// isRegisterUnused - Return true if the physical register has not been - /// used. - bool isRegisterUnused(unsigned Reg) const { - return UnusedRegs[Reg]; - } - - /// getFirstUnusedRegister - Return the first physical register that has not - /// been used. - unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) { - int Reg = UnusedRegs.find_first(); - while (Reg != -1) { - if (allocatableRCRegs[RC][Reg]) - return (unsigned)Reg; - Reg = UnusedRegs.find_next(Reg); - } - return 0; - } - /// rewrite - Rewrite all instructions in MF to use only physical registers /// by mapping all virtual register operands to their assigned physical /// registers. diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp deleted file mode 100644 index a5ec797..0000000 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ /dev/null @@ -1,2633 +0,0 @@ -//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "virtregrewriter" -#include "VirtRegRewriter.h" -#include "VirtRegMap.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumDSE , "Number of dead stores elided"); -STATISTIC(NumDSS , "Number of dead spill slots removed"); -STATISTIC(NumCommutes, "Number of instructions commuted"); -STATISTIC(NumDRM , "Number of re-materializable defs elided"); -STATISTIC(NumStores , "Number of stores added"); -STATISTIC(NumPSpills , "Number of physical register spills"); -STATISTIC(NumOmitted , "Number of reloads omitted"); -STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); -STATISTIC(NumCopified, "Number of available reloads turned into copies"); -STATISTIC(NumReMats , "Number of re-materialization"); -STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumReused , "Number of values reused"); -STATISTIC(NumDCE , "Number of copies elided"); -STATISTIC(NumSUnfold , "Number of stores unfolded"); -STATISTIC(NumModRefUnfold, "Number of modref unfolded"); - -namespace { - enum RewriterName { local, trivial }; -} - -static cl::opt<RewriterName> -RewriterOpt("rewriter", - cl::desc("Rewriter to use (default=local)"), - cl::Prefix, - cl::values(clEnumVal(local, "local rewriter"), - clEnumVal(trivial, "trivial rewriter"), - clEnumValEnd), - cl::init(local)); - -static cl::opt<bool> -ScheduleSpills("schedule-spills", - cl::desc("Schedule spill code"), - cl::init(false)); - -VirtRegRewriter::~VirtRegRewriter() {} - -/// substitutePhysReg - Replace virtual register in MachineOperand with a -/// physical register. Do the right thing with the sub-register index. -/// Note that operands may be added, so the MO reference is no longer valid. -static void substitutePhysReg(MachineOperand &MO, unsigned Reg, - const TargetRegisterInfo &TRI) { - if (MO.getSubReg()) { - MO.substPhysReg(Reg, TRI); - - // Any kill flags apply to the full virtual register, so they also apply to - // the full physical register. - // We assume that partial defs have already been decorated with a super-reg - // <imp-def> operand by LiveIntervals. - MachineInstr &MI = *MO.getParent(); - if (MO.isUse() && !MO.isUndef() && - (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) - MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); - } else { - MO.setReg(Reg); - } -} - -namespace { - -/// This class is intended for use with the new spilling framework only. It -/// rewrites vreg def/uses to use the assigned preg, but does not insert any -/// spill code. -struct TrivialRewriter : public VirtRegRewriter { - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) { - DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n"); - DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); - DEBUG(dbgs() << "**** Machine Instrs" - << "(NOTE! Does not include spills and reloads!) ****\n"); - DEBUG(MF.dump()); - - MachineRegisterInfo *mri = &MF.getRegInfo(); - const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo(); - - bool changed = false; - - for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = liItr->second; - unsigned reg = li->reg; - - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - if (!li->empty()) - mri->setPhysRegUsed(reg); - } - else { - if (!VRM.hasPhys(reg)) - continue; - unsigned pReg = VRM.getPhys(reg); - mri->setPhysRegUsed(pReg); - // Copy the register use-list before traversing it. - SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist; - for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg), - E = mri->reg_end(); I != E; ++I) - reglist.push_back(std::make_pair(&*I, I.getOperandNo())); - for (unsigned N=0; N != reglist.size(); ++N) - substitutePhysReg(reglist[N].first->getOperand(reglist[N].second), - pReg, *tri); - changed |= !reglist.empty(); - } - } - - DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.dump()); - - return changed; - } - -}; - -} - -// ************************************************************************ // - -namespace { - -/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB -/// from top down, keep track of which spill slots or remat are available in -/// each register. -/// -/// Note that not all physregs are created equal here. In particular, some -/// physregs are reloads that we are allowed to clobber or ignore at any time. -/// Other physregs are values that the register allocated program is using -/// that we cannot CHANGE, but we can read if we like. We keep track of this -/// on a per-stack-slot / remat id basis as the low bit in the value of the -/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks -/// this bit and addAvailable sets it if. -class AvailableSpills { - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - - // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled - // or remat'ed virtual register values that are still available, due to - // being loaded or stored to, but not invalidated yet. - std::map<int, unsigned> SpillSlotsOrReMatsAvailable; - - // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable, - // indicating which stack slot values are currently held by a physreg. This - // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a - // physreg is modified. - std::multimap<unsigned, int> PhysRegsAvailable; - - void disallowClobberPhysRegOnly(unsigned PhysReg); - - void ClobberPhysRegOnly(unsigned PhysReg); -public: - AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii) - : TRI(tri), TII(tii) { - } - - /// clear - Reset the state. - void clear() { - SpillSlotsOrReMatsAvailable.clear(); - PhysRegsAvailable.clear(); - } - - const TargetRegisterInfo *getRegInfo() const { return TRI; } - - /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is - /// available in a physical register, return that PhysReg, otherwise - /// return 0. - unsigned getSpillSlotOrReMatPhysReg(int Slot) const { - std::map<int, unsigned>::const_iterator I = - SpillSlotsOrReMatsAvailable.find(Slot); - if (I != SpillSlotsOrReMatsAvailable.end()) { - return I->second >> 1; // Remove the CanClobber bit. - } - return 0; - } - - /// addAvailable - Mark that the specified stack slot / remat is available - /// in the specified physreg. If CanClobber is true, the physreg can be - /// modified at any time without changing the semantics of the program. - void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) { - // If this stack slot is thought to be available in some other physreg, - // remove its record. - ModifyStackSlotOrReMat(SlotOrReMat); - - PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat)); - SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) | - (unsigned)CanClobber; - - if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Remembering RM#" - << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat); - DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) - << (CanClobber ? " canclobber" : "") << "\n"); - } - - /// canClobberPhysRegForSS - Return true if the spiller is allowed to change - /// the value of the specified stackslot register if it desires. The - /// specified stack slot must be available in a physreg for this query to - /// make sense. - bool canClobberPhysRegForSS(int SlotOrReMat) const { - assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) && - "Value not available!"); - return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1; - } - - /// canClobberPhysReg - Return true if the spiller is allowed to clobber the - /// physical register where values for some stack slot(s) might be - /// available. - bool canClobberPhysReg(unsigned PhysReg) const { - std::multimap<unsigned, int>::const_iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - I++; - if (!canClobberPhysRegForSS(SlotOrReMat)) - return false; - } - return true; - } - - /// disallowClobberPhysReg - Unset the CanClobber bit of the specified - /// stackslot register. The register is still available but is no longer - /// allowed to be modifed. - void disallowClobberPhysReg(unsigned PhysReg); - - /// ClobberPhysReg - This is called when the specified physreg changes - /// value. We use this to invalidate any info about stuff that lives in - /// it and any of its aliases. - void ClobberPhysReg(unsigned PhysReg); - - /// ModifyStackSlotOrReMat - This method is called when the value in a stack - /// slot changes. This removes information about which register the - /// previous value for this slot lives in (as the previous value is dead - /// now). - void ModifyStackSlotOrReMat(int SlotOrReMat); - - /// ClobberSharingStackSlots - When a register mapped to a stack slot changes, - /// other stack slots sharing the same register are no longer valid. - void ClobberSharingStackSlots(int StackSlot); - - /// AddAvailableRegsToLiveIn - Availability information is being kept coming - /// into the specified MBB. Add available physical registers as potential - /// live-in's. If they are reused in the MBB, they will be added to the - /// live-in set to make register scavenger and post-allocation scheduler. - void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); -}; - -} - -// ************************************************************************ // - -// Given a location where a reload of a spilled register or a remat of -// a constant is to be inserted, attempt to find a safe location to -// insert the load at an earlier point in the basic-block, to hide -// latency of the load and to avoid address-generation interlock -// issues. -static MachineBasicBlock::iterator -ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc, - MachineBasicBlock::iterator const Begin, - unsigned PhysReg, - const TargetRegisterInfo *TRI, - bool DoReMat, - int SSorRMId, - const TargetInstrInfo *TII, - const MachineFunction &MF) -{ - if (!ScheduleSpills) - return InsertLoc; - - // Spill backscheduling is of primary interest to addresses, so - // don't do anything if the register isn't in the register class - // used for pointers. - - const TargetLowering *TL = MF.getTarget().getTargetLowering(); - - if (!TL->isTypeLegal(TL->getPointerTy())) - // Believe it or not, this is true on 16-bit targets like PIC16. - return InsertLoc; - - const TargetRegisterClass *ptrRegClass = - TL->getRegClassFor(TL->getPointerTy()); - if (!ptrRegClass->contains(PhysReg)) - return InsertLoc; - - // Scan upwards through the preceding instructions. If an instruction doesn't - // reference the stack slot or the register we're loading, we can - // backschedule the reload up past it. - MachineBasicBlock::iterator NewInsertLoc = InsertLoc; - while (NewInsertLoc != Begin) { - MachineBasicBlock::iterator Prev = prior(NewInsertLoc); - for (unsigned i = 0; i < Prev->getNumOperands(); ++i) { - MachineOperand &Op = Prev->getOperand(i); - if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId) - goto stop; - } - if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 || - Prev->findRegisterDefOperand(PhysReg)) - goto stop; - for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias) - if (Prev->findRegisterUseOperandIdx(*Alias) != -1 || - Prev->findRegisterDefOperand(*Alias)) - goto stop; - NewInsertLoc = Prev; - } -stop:; - - // If we made it to the beginning of the block, turn around and move back - // down just past any existing reloads. They're likely to be reloads/remats - // for instructions earlier than what our current reload/remat is for, so - // they should be scheduled earlier. - if (NewInsertLoc == Begin) { - int FrameIdx; - while (InsertLoc != NewInsertLoc && - (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) || - TII->isTriviallyReMaterializable(NewInsertLoc))) - ++NewInsertLoc; - } - - return NewInsertLoc; -} - -namespace { - -// ReusedOp - For each reused operand, we keep track of a bit of information, -// in case we need to rollback upon processing a new operand. See comments -// below. -struct ReusedOp { - // The MachineInstr operand that reused an available value. - unsigned Operand; - - // StackSlotOrReMat - The spill slot or remat id of the value being reused. - unsigned StackSlotOrReMat; - - // PhysRegReused - The physical register the value was available in. - unsigned PhysRegReused; - - // AssignedPhysReg - The physreg that was assigned for use by the reload. - unsigned AssignedPhysReg; - - // VirtReg - The virtual register itself. - unsigned VirtReg; - - ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr, - unsigned vreg) - : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr), - AssignedPhysReg(apr), VirtReg(vreg) {} -}; - -/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that -/// is reused instead of reloaded. -class ReuseInfo { - MachineInstr &MI; - std::vector<ReusedOp> Reuses; - BitVector PhysRegsClobbered; -public: - ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) { - PhysRegsClobbered.resize(tri->getNumRegs()); - } - - bool hasReuses() const { - return !Reuses.empty(); - } - - /// addReuse - If we choose to reuse a virtual register that is already - /// available instead of reloading it, remember that we did so. - void addReuse(unsigned OpNo, unsigned StackSlotOrReMat, - unsigned PhysRegReused, unsigned AssignedPhysReg, - unsigned VirtReg) { - // If the reload is to the assigned register anyway, no undo will be - // required. - if (PhysRegReused == AssignedPhysReg) return; - - // Otherwise, remember this. - Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, - AssignedPhysReg, VirtReg)); - } - - void markClobbered(unsigned PhysReg) { - PhysRegsClobbered.set(PhysReg); - } - - bool isClobbered(unsigned PhysReg) const { - return PhysRegsClobbered.test(PhysReg); - } - - /// GetRegForReload - We are about to emit a reload into PhysReg. If there - /// is some other operand that is using the specified register, either pick - /// a new register to use, or evict the previous reload and use this reg. - unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg, - MachineFunction &MF, MachineInstr *MI, - AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - SmallSet<unsigned, 8> &Rejected, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - VirtRegMap &VRM); - - /// GetRegForReload - Helper for the above GetRegForReload(). Add a - /// 'Rejected' set to remember which registers have been considered and - /// rejected for the reload. This avoids infinite looping in case like - /// this: - /// t1 := op t2, t3 - /// t2 <- assigned r0 for use by the reload but ended up reuse r1 - /// t3 <- assigned r1 for use by the reload but ended up reuse r0 - /// t1 <- desires r1 - /// sees r1 is taken by t2, tries t2's reload register r0 - /// sees r0 is taken by t3, tries t3's reload register r1 - /// sees r1 is taken by t2, tries t2's reload register r0 ... - unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI, - AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - VirtRegMap &VRM) { - SmallSet<unsigned, 8> Rejected; - MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); - return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - } -}; - -} - -// ****************** // -// Utility Functions // -// ****************** // - -/// findSinglePredSuccessor - Return via reference a vector of machine basic -/// blocks each of which is a successor of the specified BB and has no other -/// predecessor. -static void findSinglePredSuccessor(MachineBasicBlock *MBB, - SmallVectorImpl<MachineBasicBlock *> &Succs){ - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB->pred_size() == 1) - Succs.push_back(SuccMBB); - } -} - -/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed -/// but not re-defined and it's being reused. Remove the kill flag for the -/// register and unset the kill's marker and last kill operand. -static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n"); - - MachineOperand *KillOp = KillOps[Reg]; - KillOp->setIsKill(false); - // KillOps[Reg] might be a def of a super-register. - unsigned KReg = KillOp->getReg(); - if (!RegKills[KReg]) - return; - - assert(KillOps[KReg]->getParent() == KillOp->getParent() && - "invalid superreg kill flags"); - KillOps[KReg] = NULL; - RegKills.reset(KReg); - - // If it's a def of a super-register. Its other sub-regsters are no - // longer killed as well. - for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { - DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n"); - - assert(KillOps[*SR]->getParent() == KillOp->getParent() && - "invalid subreg kill flags"); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } -} - -/// ResurrectKill - Invalidate kill info associated with a previous MI. An -/// optimization may have decided that it's safe to reuse a previously killed -/// register. If we fail to erase the invalid kill flags, then the register -/// scavenger may later clobber the register used by this MI. Note that this -/// must be done even if this MI is being deleted! Consider: -/// -/// USE $r1 (vreg1) <kill> -/// ... -/// $r1(vreg3) = COPY $r1 (vreg2) -/// -/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially -/// vreg1's only use is a kill. The rewriter doesn't know it should be live -/// until it rewrites vreg2. At that points it sees that the copy is dead and -/// deletes it. However, deleting the copy implicitly forwards liveness of $r1 -/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at -/// vreg1 before deleting the copy. -static void ResurrectKill(MachineInstr &MI, unsigned Reg, - const TargetRegisterInfo* TRI, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { - ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); - return; - } - // No previous kill for this reg. Check for subreg kills as well. - // d4 = - // store d4, fi#0 - // ... - // = s8<kill> - // ... - // = d4 <avoiding reload> - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - unsigned SReg = *SR; - if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) - ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps); - } -} - -/// InvalidateKills - MI is going to be deleted. If any of its operands are -/// marked kill, then invalidate the information. -static void InvalidateKills(MachineInstr &MI, - const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - SmallVector<unsigned, 2> *KillRegs = NULL) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef()) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (KillRegs) - KillRegs->push_back(Reg); - assert(Reg < KillOps.size()); - if (KillOps[Reg] == &MO) { - // This operand was the kill, now no longer. - KillOps[Reg] = NULL; - RegKills.reset(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - if (RegKills[*SR]) { - assert(KillOps[*SR] == &MO && "bad subreg kill flags"); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } - } - } - else { - // This operand may have reused a previously killed reg. Keep it live in - // case it continues to be used after erasing this instruction. - ResurrectKill(MI, Reg, TRI, RegKills, KillOps); - } - } -} - -/// InvalidateRegDef - If the def operand of the specified def MI is now dead -/// (since its spill instruction is removed), mark it isDead. Also checks if -/// the def MI has other definition operands that are not dead. Returns it by -/// reference. -static bool InvalidateRegDef(MachineBasicBlock::iterator I, - MachineInstr &NewDef, unsigned Reg, - bool &HasLiveDef, - const TargetRegisterInfo *TRI) { - // Due to remat, it's possible this reg isn't being reused. That is, - // the def of this reg (by prev MI) is now dead. - MachineInstr *DefMI = I; - MachineOperand *DefOp = NULL; - for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef()) - continue; - if (MO.getReg() == Reg) - DefOp = &MO; - else if (!MO.isDead()) - HasLiveDef = true; - } - if (!DefOp) - return false; - - bool FoundUse = false, Done = false; - MachineBasicBlock::iterator E = &NewDef; - ++I; ++E; - for (; !Done && I != E; ++I) { - MachineInstr *NMI = I; - for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = NMI->getOperand(j); - if (!MO.isReg() || MO.getReg() == 0 || - (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg()))) - continue; - if (MO.isUse()) - FoundUse = true; - Done = true; // Stop after scanning all the operands of this MI. - } - } - if (!FoundUse) { - // Def is dead! - DefOp->setIsDead(); - return true; - } - return false; -} - -/// UpdateKills - Track and update kill info. If a MI reads a register that is -/// marked kill, then it must be due to register reuse. Transfer the kill info -/// over. -static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - // These do not affect kill info at all. - if (MI.isDebugValue()) - return; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - - // This operand may have reused a previously killed reg. Keep it live. - ResurrectKill(MI, Reg, TRI, RegKills, KillOps); - - if (MO.isKill()) { - RegKills.set(Reg); - KillOps[Reg] = &MO; - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - RegKills.set(*SR); - KillOps[*SR] = &MO; - } - } - } - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - RegKills.reset(Reg); - KillOps[Reg] = NULL; - // It also defines (or partially define) aliases. - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - RegKills.reset(*SR); - KillOps[*SR] = NULL; - } - for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) { - RegKills.reset(*SR); - KillOps[*SR] = NULL; - } - } -} - -/// ReMaterialize - Re-materialize definition for Reg targeting DestReg. -/// -static void ReMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, - unsigned DestReg, unsigned Reg, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM) { - MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); -#ifndef NDEBUG - const MCInstrDesc &MCID = ReMatDefMI->getDesc(); - assert(MCID.getNumDefs() == 1 && - "Don't know how to remat instructions that define > 1 values!"); -#endif - TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); - MachineInstr *NewMI = prior(MII); - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) - continue; - assert(MO.isUse()); - unsigned Phys = VRM.getPhys(VirtReg); - assert(Phys && "Virtual register is not assigned a register?"); - substitutePhysReg(MO, Phys, *TRI); - } - ++NumReMats; -} - -/// findSuperReg - Find the SubReg's super-register of given register class -/// where its SubIdx sub-register is SubReg. -static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg, - unsigned SubIdx, const TargetRegisterInfo *TRI) { - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); - I != E; ++I) { - unsigned Reg = *I; - if (TRI->getSubReg(Reg, SubIdx) == SubReg) - return Reg; - } - return 0; -} - -// ******************************** // -// Available Spills Implementation // -// ******************************** // - -/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified -/// stackslot register. The register is still available but is no longer -/// allowed to be modifed. -void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { - std::multimap<unsigned, int>::iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - I++; - assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && - "Bidirectional map mismatch!"); - SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; - DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) - << " copied, it is available for use but can no longer be modified\n"); - } -} - -/// disallowClobberPhysReg - Unset the CanClobber bit of the specified -/// stackslot register and its aliases. The register and its aliases may -/// still available but is no longer allowed to be modifed. -void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) { - for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) - disallowClobberPhysRegOnly(*AS); - disallowClobberPhysRegOnly(PhysReg); -} - -/// ClobberPhysRegOnly - This is called when the specified physreg changes -/// value. We use this to invalidate any info about stuff we thing lives in it. -void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { - std::multimap<unsigned, int>::iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - PhysRegsAvailable.erase(I++); - assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && - "Bidirectional map mismatch!"); - SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); - DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) - << " clobbered, invalidating "); - if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); - else - DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n"); - } -} - -/// ClobberPhysReg - This is called when the specified physreg changes -/// value. We use this to invalidate any info about stuff we thing lives in -/// it and any of its aliases. -void AvailableSpills::ClobberPhysReg(unsigned PhysReg) { - for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) - ClobberPhysRegOnly(*AS); - ClobberPhysRegOnly(PhysReg); -} - -/// AddAvailableRegsToLiveIn - Availability information is being kept coming -/// into the specified MBB. Add available physical registers as potential -/// live-in's. If they are reused in the MBB, they will be added to the -/// live-in set to make register scavenger and post-allocation scheduler. -void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - std::set<unsigned> NotAvailable; - for (std::multimap<unsigned, int>::iterator - I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); - I != E; ++I) { - unsigned Reg = I->first; - const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg); - // FIXME: A temporary workaround. We can't reuse available value if it's - // not safe to move the def of the virtual register's class. e.g. - // X86::RFP* register classes. Do not add it as a live-in. - if (!TII->isSafeToMoveRegClassDefs(RC)) - // This is no longer available. - NotAvailable.insert(Reg); - else { - MBB.addLiveIn(Reg); - if (RegKills[Reg]) - ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); - } - - // Skip over the same register. - std::multimap<unsigned, int>::iterator NI = llvm::next(I); - while (NI != E && NI->first == Reg) { - ++I; - ++NI; - } - } - - for (std::set<unsigned>::iterator I = NotAvailable.begin(), - E = NotAvailable.end(); I != E; ++I) { - ClobberPhysReg(*I); - for (const unsigned *SubRegs = TRI->getSubRegisters(*I); - *SubRegs; ++SubRegs) - ClobberPhysReg(*SubRegs); - } -} - -/// ModifyStackSlotOrReMat - This method is called when the value in a stack -/// slot changes. This removes information about which register the previous -/// value for this slot lives in (as the previous value is dead now). -void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { - std::map<int, unsigned>::iterator It = - SpillSlotsOrReMatsAvailable.find(SlotOrReMat); - if (It == SpillSlotsOrReMatsAvailable.end()) return; - unsigned Reg = It->second >> 1; - SpillSlotsOrReMatsAvailable.erase(It); - - // This register may hold the value of multiple stack slots, only remove this - // stack slot from the set of values the register contains. - std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); - for (; ; ++I) { - assert(I != PhysRegsAvailable.end() && I->first == Reg && - "Map inverse broken!"); - if (I->second == SlotOrReMat) break; - } - PhysRegsAvailable.erase(I); -} - -void AvailableSpills::ClobberSharingStackSlots(int StackSlot) { - std::map<int, unsigned>::iterator It = - SpillSlotsOrReMatsAvailable.find(StackSlot); - if (It == SpillSlotsOrReMatsAvailable.end()) return; - unsigned Reg = It->second >> 1; - - // Erase entries in PhysRegsAvailable for other stack slots. - std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); - while (I != PhysRegsAvailable.end() && I->first == Reg) { - std::multimap<unsigned, int>::iterator NextI = llvm::next(I); - if (I->second != StackSlot) { - DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in " - << PrintReg(Reg, TRI) << '\n'); - SpillSlotsOrReMatsAvailable.erase(I->second); - PhysRegsAvailable.erase(I); - } - I = NextI; - } -} - -// ************************** // -// Reuse Info Implementation // -// ************************** // - -/// GetRegForReload - We are about to emit a reload into PhysReg. If there -/// is some other operand that is using the specified register, either pick -/// a new register to use, or evict the previous reload and use this reg. -unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, - unsigned PhysReg, - MachineFunction &MF, - MachineInstr *MI, AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - SmallSet<unsigned, 8> &Rejected, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - VirtRegMap &VRM) { - const TargetInstrInfo* TII = MF.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = Spills.getRegInfo(); - - if (Reuses.empty()) return PhysReg; // This is most often empty. - - for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) { - ReusedOp &Op = Reuses[ro]; - // If we find some other reuse that was supposed to use this register - // exactly for its reload, we can change this reload to use ITS reload - // register. That is, unless its reload register has already been - // considered and subsequently rejected because it has also been reused - // by another operand. - if (Op.PhysRegReused == PhysReg && - Rejected.count(Op.AssignedPhysReg) == 0 && - RC->contains(Op.AssignedPhysReg)) { - // Yup, use the reload register that we didn't use before. - unsigned NewReg = Op.AssignedPhysReg; - Rejected.insert(PhysReg); - return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - } else { - // Otherwise, we might also have a problem if a previously reused - // value aliases the new register. If so, codegen the previous reload - // and use this one. - unsigned PRRU = Op.PhysRegReused; - if (TRI->regsOverlap(PRRU, PhysReg)) { - // Okay, we found out that an alias of a reused register - // was used. This isn't good because it means we have - // to undo a previous reuse. - MachineBasicBlock *MBB = MI->getParent(); - const TargetRegisterClass *AliasRC = - MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg); - - // Copy Op out of the vector and remove it, we're going to insert an - // explicit load for it. - ReusedOp NewOp = Op; - Reuses.erase(Reuses.begin()+ro); - - // MI may be using only a sub-register of PhysRegUsed. - unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg(); - unsigned SubIdx = 0; - assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) && - "A reuse cannot be a virtual register"); - if (PRRU != RealPhysRegUsed) { - // What was the sub-register index? - SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed); - assert(SubIdx && - "Operand physreg is not a sub-register of PhysRegUsed"); - } - - // Ok, we're going to try to reload the assigned physreg into the - // slot that we were supposed to in the first place. However, that - // register could hold a reuse. Check to see if it conflicts or - // would prefer us to use a different register. - unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg, - MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - - bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; - int SSorRMId = DoReMat - ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat; - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, - DoReMat, SSorRMId, TII, MF); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII, - TRI, VRM); - } else { - TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg, - NewOp.StackSlotOrReMat, AliasRC, TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); - // Any stores to this stack slot are not dead anymore. - MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; - ++NumLoads; - } - Spills.ClobberPhysReg(NewPhysReg); - Spills.ClobberPhysReg(NewOp.PhysRegReused); - - unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg; - MI->getOperand(NewOp.Operand).setReg(RReg); - MI->getOperand(NewOp.Operand).setSubReg(0); - - Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - - DEBUG(dbgs() << "Reuse undone!\n"); - --NumReused; - - // Finally, PhysReg is now available, go ahead and use it. - return PhysReg; - } - } - } - return PhysReg; -} - -// ************************************************************************ // - -/// FoldsStackSlotModRef - Return true if the specified MI folds the specified -/// stack slot mod/ref. It also checks if it's possible to unfold the -/// instruction by having it define a specified physical register instead. -static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM) { - if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI)) - return false; - - bool Found = false; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; - for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { - unsigned VirtReg = I->second.first; - VirtRegMap::ModRef MR = I->second.second; - if (MR & VirtRegMap::isModRef) - if (VRM.getStackSlot(VirtReg) == SS) { - Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0; - break; - } - } - if (!Found) - return false; - - // Does the instruction uses a register that overlaps the scratch register? - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (!VRM.hasPhys(Reg)) - continue; - Reg = VRM.getPhys(Reg); - } - if (TRI->regsOverlap(PhysReg, Reg)) - return false; - } - return true; -} - -/// FindFreeRegister - Find a free register of a given register class by looking -/// at (at most) the last two machine instructions. -static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, - MachineBasicBlock &MBB, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, - BitVector &AllocatableRegs) { - BitVector Defs(TRI->getNumRegs()); - BitVector Uses(TRI->getNumRegs()); - SmallVector<unsigned, 4> LocalUses; - SmallVector<unsigned, 4> Kills; - - // Take a look at 2 instructions at most. - unsigned Count = 0; - while (Count < 2) { - if (MII == MBB.begin()) - break; - MachineInstr *PrevMI = prior(MII); - MII = PrevMI; - - if (PrevMI->isDebugValue()) - continue; // Skip over dbg_value instructions. - ++Count; - - for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = PrevMI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned Reg = MO.getReg(); - if (MO.isDef()) { - Defs.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Defs.set(*AS); - } else { - LocalUses.push_back(Reg); - if (MO.isKill() && AllocatableRegs[Reg]) - Kills.push_back(Reg); - } - } - - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned Kill = Kills[i]; - if (!Defs[Kill] && !Uses[Kill] && - RC->contains(Kill)) - return Kill; - } - for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { - unsigned Reg = LocalUses[i]; - Uses.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.set(*AS); - } - } - - return 0; -} - -static -void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg, - const TargetRegisterInfo &TRI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == VirtReg) - substitutePhysReg(MO, PhysReg, TRI); - } -} - -namespace { - -struct RefSorter { - bool operator()(const std::pair<MachineInstr*, int> &A, - const std::pair<MachineInstr*, int> &B) { - return A.second < B.second; - } -}; - -// ***************************** // -// Local Spiller Implementation // -// ***************************** // - -class LocalRewriter : public VirtRegRewriter { - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - VirtRegMap *VRM; - LiveIntervals *LIs; - BitVector AllocatableRegs; - DenseMap<MachineInstr*, unsigned> DistanceMap; - DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues; - - MachineBasicBlock *MBB; // Basic block currently being processed. - -public: - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs); - -private: - void EraseInstr(MachineInstr *MI) { - VRM->RemoveMachineInstrFromMaps(MI); - LIs->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - } - - bool OptimizeByUnfold2(unsigned VirtReg, int SS, - MachineBasicBlock::iterator &MII, - std::vector<MachineInstr*> &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - bool OptimizeByUnfold(MachineBasicBlock::iterator &MII, - std::vector<MachineInstr*> &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - bool CommuteToFoldReload(MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - const TargetRegisterInfo *TRI); - - void SpillRegToStackSlot(MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet<MachineInstr*, 4> &ReMatDefs, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - void TransferDeadness(unsigned Reg, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - bool InsertEmergencySpills(MachineInstr *MI); - - bool InsertRestores(MachineInstr *MI, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - bool InsertSpills(MachineInstr *MI); - - void ProcessUses(MachineInstr &MI, AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - BitVector &RegKills, - ReuseInfo &ReusedOperands, - std::vector<MachineOperand*> &KillOps); - - void RewriteMBB(LiveIntervals *LIs, - AvailableSpills &Spills, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); -}; -} - -bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, - LiveIntervals* lis) { - MRI = &MF.getRegInfo(); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); - VRM = &vrm; - LIs = lis; - AllocatableRegs = TRI->getAllocatableSet(MF); - DEBUG(dbgs() << "\n**** Local spiller rewriting function '" - << MF.getFunction()->getName() << "':\n"); - DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and" - " reloads!) ****\n"); - DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); - - // Spills - Keep track of which spilled values are available in physregs - // so that we can choose to reuse the physregs instead of emitting - // reloads. This is usually refreshed per basic block. - AvailableSpills Spills(TRI, TII); - - // Keep track of kill information. - BitVector RegKills(TRI->getNumRegs()); - std::vector<MachineOperand*> KillOps; - KillOps.resize(TRI->getNumRegs(), NULL); - - // SingleEntrySuccs - Successor blocks which have a single predecessor. - SmallVector<MachineBasicBlock*, 4> SinglePredSuccs; - SmallPtrSet<MachineBasicBlock*,16> EarlyVisited; - - // Traverse the basic blocks depth first. - MachineBasicBlock *Entry = MF.begin(); - SmallPtrSet<MachineBasicBlock*,16> Visited; - for (df_ext_iterator<MachineBasicBlock*, - SmallPtrSet<MachineBasicBlock*,16> > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MBB = *DFI; - if (!EarlyVisited.count(MBB)) - RewriteMBB(LIs, Spills, RegKills, KillOps); - - // If this MBB is the only predecessor of a successor. Keep the - // availability information and visit it next. - do { - // Keep visiting single predecessor successor as long as possible. - SinglePredSuccs.clear(); - findSinglePredSuccessor(MBB, SinglePredSuccs); - if (SinglePredSuccs.empty()) - MBB = 0; - else { - // FIXME: More than one successors, each of which has MBB has - // the only predecessor. - MBB = SinglePredSuccs[0]; - if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) { - Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps); - RewriteMBB(LIs, Spills, RegKills, KillOps); - } - } - } while (MBB); - - // Clear the availability info. - Spills.clear(); - } - - DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); - - // Mark unused spill slots. - MachineFrameInfo *MFI = MF.getFrameInfo(); - int SS = VRM->getLowSpillSlot(); - if (SS != VirtRegMap::NO_STACK_SLOT) { - for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) { - SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS]; - if (!VRM->isSpillSlotUsed(SS)) { - MFI->RemoveStackObject(SS); - for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) { - MachineInstr *DVMI = DbgValues[j]; - DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n'); - EraseInstr(DVMI); - } - ++NumDSS; - } - DbgValues.clear(); - } - } - Slot2DbgValues.clear(); - - return true; -} - -/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if -/// a scratch register is available. -/// xorq %r12<kill>, %r13 -/// addq %rax, -184(%rbp) -/// addq %r13, -184(%rbp) -/// ==> -/// xorq %r12<kill>, %r13 -/// movq -184(%rbp), %r12 -/// addq %rax, %r12 -/// addq %r13, %r12 -/// movq %r12, -184(%rbp) -bool LocalRewriter:: -OptimizeByUnfold2(unsigned VirtReg, int SS, - MachineBasicBlock::iterator &MII, - std::vector<MachineInstr*> &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - - MachineBasicBlock::iterator NextMII = llvm::next(MII); - // Skip over dbg_value instructions. - while (NextMII != MBB->end() && NextMII->isDebugValue()) - NextMII = llvm::next(NextMII); - if (NextMII == MBB->end()) - return false; - - if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0) - return false; - - // Now let's see if the last couple of instructions happens to have freed up - // a register. - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs); - if (!PhysReg) - return false; - - MachineFunction &MF = *MBB->getParent(); - TRI = MF.getTarget().getRegisterInfo(); - MachineInstr &MI = *MII; - if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM)) - return false; - - // If the next instruction also folds the same SS modref and can be unfoled, - // then it's worthwhile to issue a load from SS into the free register and - // then unfold these instructions. - if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)) - return false; - - // Back-schedule reloads and remats. - ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF); - - // Load from SS to the spare physical register. - TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI); - // This invalidates Phys. - Spills.ClobberPhysReg(PhysReg); - // Remember it's available. - Spills.addAvailable(SS, PhysReg); - MaybeDeadStores[SS] = NULL; - - // Unfold current MI. - SmallVector<MachineInstr*, 4> NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) - llvm_unreachable("Unable unfold the load / store folding instruction!"); - assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); - VRM->transferRestorePts(&MI, NewMIs[0]); - MII = MBB->insert(MII, NewMIs[0]); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - ++NumModRefUnfold; - - // Unfold next instructions that fold the same SS. - do { - MachineInstr &NextMI = *NextMII; - NextMII = llvm::next(NextMII); - NewMIs.clear(); - if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) - llvm_unreachable("Unable unfold the load / store folding instruction!"); - assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); - VRM->transferRestorePts(&NextMI, NewMIs[0]); - MBB->insert(NextMII, NewMIs[0]); - InvalidateKills(NextMI, TRI, RegKills, KillOps); - EraseInstr(&NextMI); - ++NumModRefUnfold; - // Skip over dbg_value instructions. - while (NextMII != MBB->end() && NextMII->isDebugValue()) - NextMII = llvm::next(NextMII); - if (NextMII == MBB->end()) - break; - } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)); - - // Store the value back into SS. - TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI); - MachineInstr *StoreMI = prior(NextMII); - VRM->addSpillSlotUse(SS, StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - - return true; -} - -/// OptimizeByUnfold - Turn a store folding instruction into a load folding -/// instruction. e.g. -/// xorl %edi, %eax -/// movl %eax, -32(%ebp) -/// movl -36(%ebp), %eax -/// orl %eax, -32(%ebp) -/// ==> -/// xorl %edi, %eax -/// orl -36(%ebp), %eax -/// mov %eax, -32(%ebp) -/// This enables unfolding optimization for a subsequent instruction which will -/// also eliminate the newly introduced store instruction. -bool LocalRewriter:: -OptimizeByUnfold(MachineBasicBlock::iterator &MII, - std::vector<MachineInstr*> &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - MachineFunction &MF = *MBB->getParent(); - MachineInstr &MI = *MII; - unsigned UnfoldedOpc = 0; - unsigned UnfoldPR = 0; - unsigned UnfoldVR = 0; - int FoldedSS = VirtRegMap::NO_STACK_SLOT; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; - for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) { - // Only transform a MI that folds a single register. - if (UnfoldedOpc) - return false; - UnfoldVR = I->second.first; - VirtRegMap::ModRef MR = I->second.second; - // MI2VirtMap be can updated which invalidate the iterator. - // Increment the iterator first. - ++I; - if (VRM->isAssignedReg(UnfoldVR)) - continue; - // If this reference is not a use, any previous store is now dead. - // Otherwise, the store to this stack slot is not dead anymore. - FoldedSS = VRM->getStackSlot(UnfoldVR); - MachineInstr* DeadStore = MaybeDeadStores[FoldedSS]; - if (DeadStore && (MR & VirtRegMap::isModRef)) { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS); - if (!PhysReg || !DeadStore->readsRegister(PhysReg)) - continue; - UnfoldPR = PhysReg; - UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), - false, true); - } - } - - if (!UnfoldedOpc) { - if (!UnfoldVR) - return false; - - // Look for other unfolding opportunities. - return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills, - RegKills, KillOps); - } - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse()) - continue; - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg()) - continue; - if (VRM->isAssignedReg(VirtReg)) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR)) - return false; - } else if (VRM->isReMaterialized(VirtReg)) - continue; - int SS = VRM->getStackSlot(VirtReg); - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - if (PhysReg) { - if (TRI->regsOverlap(PhysReg, UnfoldPR)) - return false; - continue; - } - if (VRM->hasPhys(VirtReg)) { - PhysReg = VRM->getPhys(VirtReg); - if (!TRI->regsOverlap(PhysReg, UnfoldPR)) - continue; - } - - // Ok, we'll need to reload the value into a register which makes - // it impossible to perform the store unfolding optimization later. - // Let's see if it is possible to fold the load if the store is - // unfolded. This allows us to perform the store unfolding - // optimization. - SmallVector<MachineInstr*, 4> NewMIs; - if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { - assert(NewMIs.size() == 1); - MachineInstr *NewMI = NewMIs.back(); - MBB->insert(MII, NewMI); - NewMIs.clear(); - int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); - assert(Idx != -1); - SmallVector<unsigned, 1> Ops; - Ops.push_back(Idx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS); - NewMI->eraseFromParent(); - if (FoldedMI) { - VRM->addSpillSlotUse(SS, FoldedMI); - if (!VRM->hasPhys(UnfoldVR)) - VRM->assignVirt2Phys(UnfoldVR, UnfoldPR); - VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - MII = FoldedMI; - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - return true; - } - } - } - - return false; -} - -/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and -/// where SrcReg is r1 and it is tied to r0. Return true if after -/// commuting this instruction it will be r0 = op r2, r1. -static bool CommuteChangesDestination(MachineInstr *DefMI, - const MCInstrDesc &MCID, - unsigned SrcReg, - const TargetInstrInfo *TII, - unsigned &DstIdx) { - if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3) - return false; - if (!DefMI->getOperand(1).isReg() || - DefMI->getOperand(1).getReg() != SrcReg) - return false; - unsigned DefIdx; - if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0) - return false; - unsigned SrcIdx1, SrcIdx2; - if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2)) - return false; - if (SrcIdx1 == 1 && SrcIdx2 == 2) { - DstIdx = 2; - return true; - } - return false; -} - -/// CommuteToFoldReload - -/// Look for -/// r1 = load fi#1 -/// r1 = op r1, r2<kill> -/// store r1, fi#1 -/// -/// If op is commutable and r2 is killed, then we can xform these to -/// r2 = op r2, fi#1 -/// store r2, fi#1 -bool LocalRewriter:: -CommuteToFoldReload(MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - const TargetRegisterInfo *TRI) { - if (MII == MBB->begin() || !MII->killsRegister(SrcReg)) - return false; - - MachineInstr &MI = *MII; - MachineBasicBlock::iterator DefMII = prior(MII); - MachineInstr *DefMI = DefMII; - const MCInstrDesc &MCID = DefMI->getDesc(); - unsigned NewDstIdx; - if (DefMII != MBB->begin() && - MCID.isCommutable() && - CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) { - MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); - if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) - return false; - MachineInstr *ReloadMI = prior(DefMII); - int FrameIdx; - unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx); - if (DestReg != SrcReg || FrameIdx != SS) - return false; - int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false); - if (UseIdx == -1) - return false; - unsigned DefIdx; - if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx)) - return false; - assert(DefMI->getOperand(DefIdx).isReg() && - DefMI->getOperand(DefIdx).getReg() == SrcReg); - - // Now commute def instruction. - MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); - if (!CommutedMI) - return false; - MBB->insert(MII, CommutedMI); - SmallVector<unsigned, 1> Ops; - Ops.push_back(NewDstIdx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS); - // Not needed since foldMemoryOperand returns new MI. - CommutedMI->eraseFromParent(); - if (!FoldedMI) - return false; - - VRM->addSpillSlotUse(SS, FoldedMI); - VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - // Insert new def MI and spill MI. - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI); - MII = prior(MII); - MachineInstr *StoreMI = MII; - VRM->addSpillSlotUse(SS, StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - MII = FoldedMI; // Update MII to backtrack. - - // Delete all 3 old instructions. - InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); - EraseInstr(ReloadMI); - InvalidateKills(*DefMI, TRI, RegKills, KillOps); - EraseInstr(DefMI); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - - // If NewReg was previously holding value of some SS, it's now clobbered. - // This has to be done now because it's a physical register. When this - // instruction is re-visited, it's ignored. - Spills.ClobberPhysReg(NewReg); - - ++NumCommutes; - return true; - } - - return false; -} - -/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if -/// the last store to the same slot is now dead. If so, remove the last store. -void LocalRewriter:: -SpillRegToStackSlot(MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet<MachineInstr*, 4> &ReMatDefs, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - - MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC, - TRI); - MachineInstr *StoreMI = prior(oldNextMII); - VRM->addSpillSlotUse(StackSlot, StoreMI); - DEBUG(dbgs() << "Store:\t" << *StoreMI); - - // If there is a dead store to this stack slot, nuke it now. - if (LastStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *LastStore); - ++NumDSE; - SmallVector<unsigned, 2> KillRegs; - InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); - MachineBasicBlock::iterator PrevMII = LastStore; - bool CheckDef = PrevMII != MBB->begin(); - if (CheckDef) - --PrevMII; - EraseInstr(LastStore); - if (CheckDef) { - // Look at defs of killed registers on the store. Mark the defs - // as dead since the store has been deleted and they aren't - // being reused. - for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { - bool HasOtherDef = false; - if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) { - MachineInstr *DeadDef = PrevMII; - if (ReMatDefs.count(DeadDef) && !HasOtherDef) { - // FIXME: This assumes a remat def does not have side effects. - EraseInstr(DeadDef); - ++NumDRM; - } - } - } - } - } - - // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume - // the last of multiple instructions is the actual store. - LastStore = prior(oldNextMII); - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register available, - // in PhysReg. - Spills.ModifyStackSlotOrReMat(StackSlot); - Spills.ClobberPhysReg(PhysReg); - Spills.addAvailable(StackSlot, PhysReg, isAvailable); - ++NumStores; -} - -/// isSafeToDelete - Return true if this instruction doesn't produce any side -/// effect and all of its defs are dead. -static bool isSafeToDelete(MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() || - MCID.isCall() || MCID.isBarrier() || MCID.isReturn() || - MI.isLabel() || MI.isDebugValue() || - MI.hasUnmodeledSideEffects()) - return false; - - // Technically speaking inline asm without side effects and no defs can still - // be deleted. But there is so much bad inline asm code out there, we should - // let them be. - if (MI.isInlineAsm()) - return false; - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - // FIXME: We can't remove kill markers or else the scavenger will assert. - // An alternative is to add a ADD pseudo instruction to replace kill - // markers. - return false; - } - return true; -} - -/// TransferDeadness - A identity copy definition is dead and it's being -/// removed. Find the last def or use and mark it as dead / kill. -void LocalRewriter:: -TransferDeadness(unsigned Reg, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - SmallPtrSet<MachineInstr*, 4> Seens; - SmallVector<std::pair<MachineInstr*, int>,8> Refs; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineInstr *UDMI = &*RI; - if (UDMI->isDebugValue() || UDMI->getParent() != MBB) - continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); - if (DI == DistanceMap.end()) - continue; - if (Seens.insert(UDMI)) - Refs.push_back(std::make_pair(UDMI, DI->second)); - } - - if (Refs.empty()) - return; - std::sort(Refs.begin(), Refs.end(), RefSorter()); - - while (!Refs.empty()) { - MachineInstr *LastUDMI = Refs.back().first; - Refs.pop_back(); - - MachineOperand *LastUD = NULL; - for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = LastUDMI->getOperand(i); - if (!MO.isReg() || MO.getReg() != Reg) - continue; - if (!LastUD || (LastUD->isUse() && MO.isDef())) - LastUD = &MO; - if (LastUDMI->isRegTiedToDefOperand(i)) - break; - } - if (LastUD->isDef()) { - // If the instruction has no side effect, delete it and propagate - // backward further. Otherwise, mark is dead and we are done. - if (!isSafeToDelete(*LastUDMI)) { - LastUD->setIsDead(); - break; - } - EraseInstr(LastUDMI); - } else { - LastUD->setIsKill(); - RegKills.set(Reg); - KillOps[Reg] = LastUD; - break; - } - } -} - -/// InsertEmergencySpills - Insert emergency spills before MI if requested by -/// VRM. Return true if spills were inserted. -bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { - if (!VRM->hasEmergencySpills(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - SmallSet<int, 4> UsedSS; - std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI); - for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { - unsigned PhysReg = EmSpills[i]; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); - assert(RC && "Unable to determine register class!"); - int SS = VRM->getEmergencySpillSlot(RC); - if (UsedSS.count(SS)) - llvm_unreachable("Need to spill more than one physical registers!"); - UsedSS.insert(SS); - TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI); - MachineInstr *StoreMI = prior(MII); - VRM->addSpillSlotUse(SS, StoreMI); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS, - TII, *MBB->getParent()); - - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI); - - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SS, LoadMI); - ++NumPSpills; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - return true; -} - -/// InsertRestores - Restore registers before MI is requested by VRM. Return -/// true is any instructions were inserted. -bool LocalRewriter::InsertRestores(MachineInstr *MI, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - if (!VRM->isRestorePt(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI); - for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) { - unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order. - if (!VRM->getPreSplitReg(VirtReg)) - continue; // Split interval spilled again. - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - - // Check if the value being restored if available. If so, it must be - // from a predecessor BB that fallthrough into this BB. We do not - // expect: - // BB1: - // r1 = load fi#1 - // ... - // = r1<kill> - // ... # r1 not clobbered - // ... - // = load fi#1 - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - if (InReg == Phys) { - // If the value is already available in the expected register, save - // a reload / remat. - if (SSorRMId) - DEBUG(dbgs() << "Reusing RM#" - << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << SSorRMId); - DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for " << PrintReg(VirtReg) - <<" instead of reloading into physreg " - << TRI->getName(Phys) << '\n'); - - // Reusing a physreg may resurrect it. But we expect ProcessUses to update - // the kill flags for the current instruction after processing it. - - ++NumOmitted; - continue; - } else if (InReg && InReg != Phys) { - if (SSorRMId) - DEBUG(dbgs() << "Reusing RM#" - << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << SSorRMId); - DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for " << PrintReg(VirtReg) - <<" by copying it into physreg " - << TRI->getName(Phys) << '\n'); - - // If the reloaded / remat value is available in another register, - // copy it to the desired register. - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, - *MBB->getParent()); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), Phys) - .addReg(InReg, RegState::Kill); - - // This invalidates Phys. - Spills.ClobberPhysReg(Phys); - // Remember it's available. - Spills.addAvailable(SSorRMId, Phys); - - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - DEBUG(dbgs() << '\t' << *CopyMI); - ++NumCopified; - continue; - } - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, - *MBB->getParent()); - - if (VRM->isReMaterialized(VirtReg)) { - ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - - // This invalidates Phys. - Spills.ClobberPhysReg(Phys); - // Remember it's available. - Spills.addAvailable(SSorRMId, Phys); - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(MII)); - } - return true; -} - -/// InsertSpills - Insert spills after MI if requested by VRM. Return -/// true if spills were inserted. -bool LocalRewriter::InsertSpills(MachineInstr *MI) { - if (!VRM->isSpillPt(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - std::vector<std::pair<unsigned,bool> > &SpillRegs = - VRM->getSpillPtSpills(MI); - for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) { - unsigned VirtReg = SpillRegs[i].first; - bool isKill = SpillRegs[i].second; - if (!VRM->getPreSplitReg(VirtReg)) - continue; // Split interval spilled again. - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - unsigned Phys = VRM->getPhys(VirtReg); - int StackSlot = VRM->getStackSlot(VirtReg); - MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot, - RC, TRI); - MachineInstr *StoreMI = prior(oldNextMII); - VRM->addSpillSlotUse(StackSlot, StoreMI); - DEBUG(dbgs() << "Store:\t" << *StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - } - return true; -} - - -/// ProcessUses - Process all of MI's spilled operands and all available -/// operands. -void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - BitVector &RegKills, - ReuseInfo &ReusedOperands, - std::vector<MachineOperand*> &KillOps) { - // Clear kill info. - SmallSet<unsigned, 2> KilledMIRegs; - SmallVector<unsigned, 4> VirtUseOps; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; // Ignore non-register operands. - - unsigned VirtReg = MO.getReg(); - - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { - // Ignore physregs for spilling, but remember that it is used by this - // function. - MRI->setPhysRegUsed(VirtReg); - continue; - } - - // We want to process implicit virtual register uses first. - if (MO.isImplicit()) - // If the virtual register is implicitly defined, emit a implicit_def - // before so scavenger knows it's "defined". - // FIXME: This is a horrible hack done the by register allocator to - // remat a definition with virtual register operand. - VirtUseOps.insert(VirtUseOps.begin(), i); - else - VirtUseOps.push_back(i); - - // A partial def causes problems because the same operand both reads and - // writes the register. This rewriter is designed to rewrite uses and defs - // separately, so a partial def would already have been rewritten to a - // physreg by the time we get to processing defs. - // Add an implicit use operand to model the partial def. - if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) && - MI.findRegisterUseOperandIdx(VirtReg) == -1) { - VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands()); - MI.addOperand(MachineOperand::CreateReg(VirtReg, - false, // isDef - true)); // isImplicit - DEBUG(dbgs() << "Partial redef: " << MI); - } - } - - // Process all of the spilled uses and all non spilled reg references. - SmallVector<int, 2> PotentialDeadStoreSlots; - KilledMIRegs.clear(); - for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { - unsigned i = VirtUseOps[j]; - unsigned VirtReg = MI.getOperand(i).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register?"); - - unsigned SubIdx = MI.getOperand(i).getSubReg(); - if (VRM->isAssignedReg(VirtReg)) { - // This virtual register was assigned a physreg! - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - if (MI.getOperand(i).isDef()) - ReusedOperands.markClobbered(Phys); - substitutePhysReg(MI.getOperand(i), Phys, *TRI); - if (VRM->isImplicitlyDefined(VirtReg)) - // FIXME: Is this needed? - BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), Phys); - continue; - } - - // This virtual register is now known to be a spilled value. - if (!MI.getOperand(i).isUse()) - continue; // Handle defs in the loop below (handle use&def here though) - - bool AvoidReload = MI.getOperand(i).isUndef(); - // Check if it is defined by an implicit def. It should not be spilled. - // Note, this is for correctness reason. e.g. - // 8 %reg1024<def> = IMPLICIT_DEF - // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - int ReuseSlot = SSorRMId; - - // Check to see if this stack slot is available. - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - - // If this is a sub-register use, make sure the reuse register is in the - // right register class. For example, for x86 not all of the 32-bit - // registers have accessible sub-registers. - // Similarly so for EXTRACT_SUBREG. Consider this: - // EDI = op - // MOV32_mr fi#1, EDI - // ... - // = EXTRACT_SUBREG fi#1 - // fi#1 is available in EDI, but it cannot be reused because it's not in - // the right register file. - if (PhysReg && !AvoidReload && SubIdx) { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - if (!RC->contains(PhysReg)) - PhysReg = 0; - } - - if (PhysReg && !AvoidReload) { - // This spilled operand might be part of a two-address operand. If this - // is the case, then changing it will necessarily require changing the - // def part of the instruction as well. However, in some cases, we - // aren't allowed to modify the reused register. If none of these cases - // apply, reuse it. - bool CanReuse = true; - bool isTied = MI.isRegTiedToDefOperand(i); - if (isTied) { - // Okay, we have a two address operand. We can reuse this physreg as - // long as we are allowed to clobber the value and there isn't an - // earlier def that has already clobbered the physreg. - CanReuse = !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg); - } - // If this is an asm, and a PhysReg alias is used elsewhere as an - // earlyclobber operand, we can't also use it as an input. - if (MI.isInlineAsm()) { - for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { - MachineOperand &MOk = MI.getOperand(k); - if (MOk.isReg() && MOk.isEarlyClobber() && - TRI->regsOverlap(MOk.getReg(), PhysReg)) { - CanReuse = false; - DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) - << " for " << PrintReg(VirtReg) << ": " << MOk - << '\n'); - break; - } - } - } - - if (CanReuse) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " - << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg) - << " instead of reloading into " - << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n'); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - // Reusing a physreg may resurrect it. But we expect ProcessUses to - // update the kill flags for the current instr after processing it. - - // The only technical detail we have is that we don't know that - // PhysReg won't be clobbered by a reloaded stack slot that occurs - // later in the instruction. In particular, consider 'op V1, V2'. - // If V1 is available in physreg R0, we would choose to reuse it - // here, instead of reloading it into the register the allocator - // indicated (say R1). However, V2 might have to be reloaded - // later, and it might indicate that it needs to live in R0. When - // this occurs, we need to have information available that - // indicates it is safe to use R1 for the reload instead of R0. - // - // To further complicate matters, we might conflict with an alias, - // or R0 and R1 might not be compatible with each other. In this - // case, we actually insert a reload for V1 in R1, ensuring that - // we can get at R0 or its alias. - ReusedOperands.addReuse(i, ReuseSlot, PhysReg, - VRM->getPhys(VirtReg), VirtReg); - if (isTied) - // Only mark it clobbered if this is a use&def operand. - ReusedOperands.markClobbered(PhysReg); - ++NumReused; - - if (MI.getOperand(i).isKill() && - ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { - - // The store of this spilled value is potentially dead, but we - // won't know for certain until we've confirmed that the re-use - // above is valid, which means waiting until the other operands - // are processed. For now we just track the spill slot, we'll - // remove it after the other operands are processed if valid. - - PotentialDeadStoreSlots.push_back(ReuseSlot); - } - - // Mark is isKill if it's there no other uses of the same virtual - // register and it's not a two-address operand. IsKill will be - // unset if reg is reused. - if (!isTied && KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - continue; - } // CanReuse - - // Otherwise we have a situation where we have a two-address instruction - // whose mod/ref operand needs to be reloaded. This reload is already - // available in some register "PhysReg", but if we used PhysReg as the - // operand to our 2-addr instruction, the instruction would modify - // PhysReg. This isn't cool if something later uses PhysReg and expects - // to get its initial value. - // - // To avoid this problem, and to avoid doing a load right after a store, - // we emit a copy from PhysReg into the designated register for this - // operand. - // - // This case also applies to an earlyclobber'd PhysReg. - unsigned DesignatedReg = VRM->getPhys(VirtReg); - assert(DesignatedReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - DesignatedReg = ReusedOperands. - GetRegForReload(VirtReg, DesignatedReg, &MI, Spills, - MaybeDeadStores, RegKills, KillOps, *VRM); - - // If the mapped designated register is actually the physreg we have - // incoming, we don't need to inserted a dead copy. - if (DesignatedReg == PhysReg) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) - << " for " << PrintReg(VirtReg) - << " instead of reloading into same physreg.\n"); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - ReusedOperands.markClobbered(RReg); - ++NumReused; - continue; - } - - MRI->setPhysRegUsed(DesignatedReg); - ReusedOperands.markClobbered(DesignatedReg); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, *MBB->getParent()); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY), - DesignatedReg).addReg(PhysReg); - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - // This invalidates DesignatedReg. - Spills.ClobberPhysReg(DesignatedReg); - - Spills.addAvailable(ReuseSlot, DesignatedReg); - unsigned RReg = - SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - ++NumReused; - continue; - } // if (PhysReg) - - // Otherwise, reload it and remember that we have it. - PhysReg = VRM->getPhys(VirtReg); - assert(PhysReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - - MRI->setPhysRegUsed(PhysReg); - ReusedOperands.markClobbered(PhysReg); - if (AvoidReload) - ++NumAvoided; - else { - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, *MBB->getParent()); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - // This invalidates PhysReg. - Spills.ClobberPhysReg(PhysReg); - - // Any stores to this stack slot are not dead anymore. - if (!DoReMat) - MaybeDeadStores[SSorRMId] = NULL; - Spills.addAvailable(SSorRMId, PhysReg); - // Assumes this is the last use. IsKill will be unset if reg is reused - // unless it's a two-address operand. - if (!MI.isRegTiedToDefOperand(i) && - KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - } - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - } - - // Ok - now we can remove stores that have been confirmed dead. - for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { - // This was the last use and the spilled value is still available - // for reuse. That means the spill was unnecessary! - int PDSSlot = PotentialDeadStoreSlots[j]; - MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; - if (DeadStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - EraseInstr(DeadStore); - MaybeDeadStores[PDSSlot] = NULL; - ++NumDSE; - } - } -} - -/// rewriteMBB - Keep track of which spills are available even after the -/// register allocator is done with them. If possible, avoid reloading vregs. -void -LocalRewriter::RewriteMBB(LiveIntervals *LIs, - AvailableSpills &Spills, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - - DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '" - << MBB->getName() << "':\n"); - - MachineFunction &MF = *MBB->getParent(); - - // MaybeDeadStores - When we need to write a value back into a stack slot, - // keep track of the inserted store. If the stack slot value is never read - // (because the value was used from some available register, for example), and - // subsequently stored to, the original store is dead. This map keeps track - // of inserted stores that are not used. If we see a subsequent store to the - // same stack slot, the original store is deleted. - std::vector<MachineInstr*> MaybeDeadStores; - MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL); - - // ReMatDefs - These are rematerializable def MIs which are not deleted. - SmallSet<MachineInstr*, 4> ReMatDefs; - - // Keep track of the registers we have already spilled in case there are - // multiple defs of the same register in MI. - SmallSet<unsigned, 8> SpilledMIRegs; - - RegKills.reset(); - KillOps.clear(); - KillOps.resize(TRI->getNumRegs(), NULL); - - DistanceMap.clear(); - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ) { - MachineBasicBlock::iterator NextMII = llvm::next(MII); - - if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps)) - NextMII = llvm::next(MII); - - if (InsertEmergencySpills(MII)) - NextMII = llvm::next(MII); - - InsertRestores(MII, Spills, RegKills, KillOps); - - if (InsertSpills(MII)) - NextMII = llvm::next(MII); - - bool Erased = false; - bool BackTracked = false; - MachineInstr &MI = *MII; - - // Remember DbgValue's which reference stack slots. - if (MI.isDebugValue() && MI.getOperand(0).isFI()) - Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI); - - /// ReusedOperands - Keep track of operand reuse in case we need to undo - /// reuse. - ReuseInfo ReusedOperands(MI, TRI); - - ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps); - - DEBUG(dbgs() << '\t' << MI); - - - // If we have folded references to memory operands, make sure we clear all - // physical registers that may contain the value of the spilled virtual - // register - - // Copy the folded virts to a small vector, we may change MI2VirtMap. - SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts; - // C++0x FTW! - for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator, - VirtRegMap::MI2VirtMapTy::const_iterator> FVRange = - VRM->getFoldedVirts(&MI); - FVRange.first != FVRange.second; ++FVRange.first) - FoldedVirts.push_back(FVRange.first->second); - - SmallSet<int, 2> FoldedSS; - for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { - unsigned VirtReg = FoldedVirts[FVI].first; - VirtRegMap::ModRef MR = FoldedVirts[FVI].second; - DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR); - - int SS = VRM->getStackSlot(VirtReg); - if (SS == VirtRegMap::NO_STACK_SLOT) - continue; - FoldedSS.insert(SS); - DEBUG(dbgs() << " - StackSlot: " << SS << "\n"); - - // If this folded instruction is just a use, check to see if it's a - // straight load from the virt reg slot. - if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) { - int FrameIdx; - unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx); - if (DestReg && FrameIdx == SS) { - // If this spill slot is available, turn it into a copy (or nothing) - // instead of leaving it as a load! - if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { - DEBUG(dbgs() << "Promoted Load To Copy: " << MI); - if (DestReg != InReg) { - MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); - MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY)) - .addReg(DestReg, RegState::Define, DefMO->getSubReg()) - .addReg(InReg, RegState::Kill); - // Revisit the copy so we make sure to notice the effects of the - // operation on the destreg (either needing to RA it if it's - // virtual or needing to clobber any values if it's physical). - NextMII = CopyMI; - NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); - BackTracked = true; - } else { - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - // InvalidateKills resurrects any prior kill of the copy's source - // allowing the source reg to be reused in place of the copy. - Spills.disallowClobberPhysReg(InReg); - } - - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - goto ProcessNextInst; - } - } else { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - SmallVector<MachineInstr*, 4> NewMIs; - if (PhysReg && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){ - MBB->insert(MII, NewMIs[0]); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - --NextMII; // backtrack to the unfolded instruction. - BackTracked = true; - goto ProcessNextInst; - } - } - } - - // If this reference is not a use, any previous store is now dead. - // Otherwise, the store to this stack slot is not dead anymore. - MachineInstr* DeadStore = MaybeDeadStores[SS]; - if (DeadStore) { - bool isDead = !(MR & VirtRegMap::isRef); - MachineInstr *NewStore = NULL; - if (MR & VirtRegMap::isModRef) { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - SmallVector<MachineInstr*, 4> NewMIs; - // We can reuse this physreg as long as we are allowed to clobber - // the value and there isn't an earlier def that has already clobbered - // the physreg. - if (PhysReg && - !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg) && - !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable! - MachineOperand *KillOpnd = - DeadStore->findRegisterUseOperand(PhysReg, true); - // Note, if the store is storing a sub-register, it's possible the - // super-register is needed below. - if (KillOpnd && !KillOpnd->getSubReg() && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){ - MBB->insert(MII, NewMIs[0]); - NewStore = NewMIs[1]; - MBB->insert(MII, NewStore); - VRM->addSpillSlotUse(SS, NewStore); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - --NextMII; - --NextMII; // backtrack to the unfolded instruction. - BackTracked = true; - isDead = true; - ++NumSUnfold; - } - } - } - - if (isDead) { // Previous store is dead. - // If we get here, the store is dead, nuke it now. - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - EraseInstr(DeadStore); - if (!NewStore) - ++NumDSE; - } - - MaybeDeadStores[SS] = NULL; - if (NewStore) { - // Treat this store as a spill merged into a copy. That makes the - // stack slot value available. - VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod); - goto ProcessNextInst; - } - } - - // If the spill slot value is available, and this is a new definition of - // the value, the value is not available anymore. - if (MR & VirtRegMap::isMod) { - // Notice that the value in this stack slot has been modified. - Spills.ModifyStackSlotOrReMat(SS); - - // If this is *just* a mod of the value, check to see if this is just a - // store to the spill slot (i.e. the spill got merged into the copy). If - // so, realize that the vreg is available now, and add the store to the - // MaybeDeadStore info. - int StackSlot; - if (!(MR & VirtRegMap::isRef)) { - if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { - assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && - "Src hasn't been allocated yet?"); - - if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot, - Spills, RegKills, KillOps, TRI)) { - NextMII = llvm::next(MII); - BackTracked = true; - goto ProcessNextInst; - } - - // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark - // this as a potentially dead store in case there is a subsequent - // store into the stack slot without a read from it. - MaybeDeadStores[StackSlot] = &MI; - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register - // available in PhysReg. - Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg)); - } - } - } - } - - // Process all of the spilled defs. - SpilledMIRegs.clear(); - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!(MO.isReg() && MO.getReg() && MO.isDef())) - continue; - - unsigned VirtReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - // Also check if it's copying from an "undef", if so, we can't - // eliminate this or else the undef marker is lost and it will - // confuses the scavenger. This is extremely rare. - if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() && - MI.getNumOperands() == 2) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - SmallVector<unsigned, 2> KillRegs; - InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); - if (MO.isDead() && !KillRegs.empty()) { - // Source register or an implicit super/sub-register use is killed. - assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg())); - // Last def is now dead. - TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); - } - EraseInstr(&MI); - Erased = true; - Spills.disallowClobberPhysReg(VirtReg); - goto ProcessNextInst; - } - - // If it's not a no-op copy, it clobbers the value in the destreg. - Spills.ClobberPhysReg(VirtReg); - ReusedOperands.markClobbered(VirtReg); - - // Check to see if this instruction is a load from a stack slot into - // a register. If so, this provides the stack slot value in the reg. - int FrameIdx; - if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { - assert(DestReg == VirtReg && "Unknown load situation!"); - - // If it is a folded reference, then it's not safe to clobber. - bool Folded = FoldedSS.count(FrameIdx); - // Otherwise, if it wasn't available, remember that it is now! - Spills.addAvailable(FrameIdx, DestReg, !Folded); - goto ProcessNextInst; - } - - continue; - } - - unsigned SubIdx = MO.getSubReg(); - bool DoReMat = VRM->isReMaterialized(VirtReg); - if (DoReMat) - ReMatDefs.insert(&MI); - - // The only vregs left are stack slot definitions. - int StackSlot = VRM->getStackSlot(VirtReg); - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - - // If this def is part of a two-address operand, make sure to execute - // the store from the correct physical register. - unsigned PhysReg; - unsigned TiedOp; - if (MI.isRegTiedToUseOperand(i, &TiedOp)) { - PhysReg = MI.getOperand(TiedOp).getReg(); - if (SubIdx) { - unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI); - assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg && - "Can't find corresponding super-register!"); - PhysReg = SuperReg; - } - } else { - PhysReg = VRM->getPhys(VirtReg); - if (ReusedOperands.isClobbered(PhysReg)) { - // Another def has taken the assigned physreg. It must have been a - // use&def which got it due to reuse. Undo the reuse! - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - } - } - - // If StackSlot is available in a register that also holds other stack - // slots, clobber those stack slots now. - Spills.ClobberSharingStackSlots(StackSlot); - - assert(PhysReg && "VR not assigned a physical register?"); - MRI->setPhysRegUsed(PhysReg); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - ReusedOperands.markClobbered(RReg); - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) { - MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; - SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true, - LastStore, Spills, ReMatDefs, RegKills, KillOps); - NextMII = llvm::next(MII); - - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - if (MI.isIdentityCopy()) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - UpdateKills(*LastStore, TRI, RegKills, KillOps); - goto ProcessNextInst; - } - } - } - ProcessNextInst: - // Delete dead instructions without side effects. - if (!Erased && !BackTracked && isSafeToDelete(MI)) { - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - } - if (!Erased) - DistanceMap.insert(std::make_pair(&MI, DistanceMap.size())); - if (!Erased && !BackTracked) { - for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) - UpdateKills(*II, TRI, RegKills, KillOps); - } - MII = NextMII; - } - -} - -llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { - switch (RewriterOpt) { - default: llvm_unreachable("Unreachable!"); - case local: - return new LocalRewriter(); - case trivial: - return new TrivialRewriter(); - } -} diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h deleted file mode 100644 index 93474e0..0000000 --- a/lib/CodeGen/VirtRegRewriter.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H -#define LLVM_CODEGEN_VIRTREGREWRITER_H - -namespace llvm { - class LiveIntervals; - class MachineFunction; - class VirtRegMap; - - /// VirtRegRewriter interface: Implementations of this interface assign - /// spilled virtual registers to stack slots, rewriting the code. - struct VirtRegRewriter { - virtual ~VirtRegRewriter(); - virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) = 0; - }; - - /// createVirtRegRewriter - Create an return a rewriter object, as specified - /// on the command line. - VirtRegRewriter* createVirtRegRewriter(); - -} - -#endif diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt index 92aa76a..813ccce 100644 --- a/lib/ExecutionEngine/JIT/CMakeLists.txt +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -12,10 +12,11 @@ add_llvm_library(LLVMJIT ) add_llvm_library_dependencies(LLVMJIT + LLVMCodeGen LLVMCore LLVMExecutionEngine + LLVMMC LLVMRuntimeDyld LLVMSupport LLVMTarget - LLVMCodeGen ) diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt index b974713..21cb300 100644 --- a/lib/ExecutionEngine/JIT/LLVMBuild.txt +++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = JIT parent = ExecutionEngine -required_libraries = CodeGen Core ExecutionEngine MC Support Target +required_libraries = CodeGen Core ExecutionEngine MC RuntimeDyld Support Target diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h index c17a397..58f9100 100644 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h +++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h @@ -39,9 +39,9 @@ public: if (Name[0] == '_') ++Name; Function *F = M->getFunction(Name); // Some ObjC names have a prefixed \01 in the IR. If we failed to find - // the symbol and it's of the ObjC conventions (starts with "-"), try - // prepending a \01 and see if we can find it that way. - if (!F && Name[0] == '-') + // the symbol and it's of the ObjC conventions (starts with "-" or + // "+"), try prepending a \01 and see if we can find it that way. + if (!F && (Name[0] == '-' || Name[0] == '+')) F = M->getFunction((Twine("\1") + Name).str()); assert(F && "No matching function in JIT IR Module!"); return JMM->startFunctionBody(F, Size); @@ -56,9 +56,9 @@ public: if (Name[0] == '_') ++Name; Function *F = M->getFunction(Name); // Some ObjC names have a prefixed \01 in the IR. If we failed to find - // the symbol and it's of the ObjC conventions (starts with "-"), try - // prepending a \01 and see if we can find it that way. - if (!F && Name[0] == '-') + // the symbol and it's of the ObjC conventions (starts with "-" or + // "+"), try prepending a \01 and see if we can find it that way. + if (!F && (Name[0] == '-' || Name[0] == '+')) F = M->getFunction((Twine("\1") + Name).str()); assert(F && "No matching function in JIT IR Module!"); JMM->endFunctionBody(F, FunctionStart, FunctionEnd); diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt index acc8dff..8ad66b6 100644 --- a/lib/MC/LLVMBuild.txt +++ b/lib/MC/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MC parent = Libraries -required_libraries = Support +required_libraries = Object Support diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt index 4debb28..5cf5f1b 100644 --- a/lib/MC/MCDisassembler/CMakeLists.txt +++ b/lib/MC/MCDisassembler/CMakeLists.txt @@ -10,21 +10,4 @@ add_llvm_library_dependencies(LLVMMCDisassembler LLVMMC LLVMMCParser LLVMSupport - LLVMTarget ) - -foreach(t ${LLVM_TARGETS_TO_BUILD}) - set(td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}) - if(EXISTS ${td}/TargetInfo/CMakeLists.txt) - add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Info") - endif() - if(EXISTS ${td}/MCTargetDesc/CMakeLists.txt) - add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Desc") - endif() - if(EXISTS ${td}/AsmParser/CMakeLists.txt) - add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}AsmParser") - endif() - if(EXISTS ${td}/Disassembler/CMakeLists.txt) - add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Disassembler") - endif() -endforeach(t) diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 16e66dc..f156760 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -18,7 +18,6 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" namespace llvm { class Target; @@ -35,12 +34,6 @@ using namespace llvm; LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, int TagType, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp) { - // Initialize targets and assembly printers/parsers. - llvm::InitializeAllTargetInfos(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmParsers(); - llvm::InitializeAllDisassemblers(); - // Get the target. std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index 5633cb1..3540334 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -34,10 +34,8 @@ #include "llvm/Support/MemoryObject.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" using namespace llvm; -bool EDDisassembler::sInitialized = false; EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; struct TripleMap { @@ -98,20 +96,6 @@ static int getLLVMSyntaxVariant(Triple::ArchType arch, } } -void EDDisassembler::initialize() { - if (sInitialized) - return; - - sInitialized = true; - - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - InitializeAllDisassemblers(); -} - -#undef BRINGUP_TARGET - EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, AssemblySyntax syntax) { CPUKey key; diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h index 38c2203..97c2d1f 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.h +++ b/lib/MC/MCDisassembler/EDDisassembler.h @@ -94,8 +94,6 @@ struct EDDisassembler { typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t; - /// True if the disassembler registry has been initialized; false if not - static bool sInitialized; /// A map from disassembler specifications to disassemblers. Populated /// lazily. static DisassemblerMap_t sDisassemblers; @@ -116,9 +114,6 @@ struct EDDisassembler { static EDDisassembler *getDisassembler(llvm::StringRef str, AssemblySyntax syntax); - /// initialize - Initializes the disassembler registry and the LLVM backend - static void initialize(); - //////////////////////// // Per-object members // //////////////////////// diff --git a/lib/MC/MCDisassembler/LLVMBuild.txt b/lib/MC/MCDisassembler/LLVMBuild.txt index 7b8c7ff..d73c6ad 100644 --- a/lib/MC/MCDisassembler/LLVMBuild.txt +++ b/lib/MC/MCDisassembler/LLVMBuild.txt @@ -19,7 +19,4 @@ type = Library name = MCDisassembler parent = MC -; FIXME: This is really horrible, MCDisassembler should not in and of its own -; accord depending on every target. -required_libraries = all-targets MC MCParser Support - +required_libraries = MC MCParser Support diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index b68fcaf..d2bbd7d 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -1011,8 +1011,8 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, // Emit the compact unwind info if available. // FIXME: This emits both the compact unwind and the old CIE/FDE // information. Only one of those is needed. - // FIXME: Disable. This is causing failures in the test suite. - if (IsEH && MOFI->getCompactUnwindSection()) + // FIXME: Disable. This seems to still be causing failures. + if (false && IsEH && MOFI->getCompactUnwindSection()) for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); if (Frame.CompactUnwindEncoding) diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt index 44c0083..20fbb85 100644 --- a/lib/Object/LLVMBuild.txt +++ b/lib/Object/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = Object parent = Libraries -required_libraries = Support +required_libraries = Core Support diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index c767c15..098cccb 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -27,8 +27,15 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), if (Ptr == 0) { void* tmp = Creator ? Creator() : 0; + TsanHappensBefore(this); sys::MemoryFence(); + + // This write is racy against the first read in the ManagedStatic + // accessors. The race is benign because it does a second read after a + // memory fence, at which point it isn't possible to get a partial value. + TsanIgnoreWritesBegin(); Ptr = tmp; + TsanIgnoreWritesEnd(); DeleterFn = Deleter; // Add to list of managed statics. @@ -72,4 +79,3 @@ void llvm::llvm_shutdown() { if (llvm_is_multithreaded()) llvm_stop_multithreaded(); } - diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp index 7034485..078d705 100644 --- a/lib/Support/Valgrind.cpp +++ b/lib/Support/Valgrind.cpp @@ -52,3 +52,14 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) { } #endif // !HAVE_VALGRIND_VALGRIND_H + +// These functions require no implementation, tsan just looks at the arguments +// they're called with. +extern "C" { +void AnnotateHappensBefore(const char *file, int line, + const volatile void *cv) {} +void AnnotateHappensAfter(const char *file, int line, + const volatile void *cv) {} +void AnnotateIgnoreWritesBegin(const char *file, int line) {} +void AnnotateIgnoreWritesEnd(const char *file, int line) {} +} diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 92559d1..7c2ee22 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -1699,7 +1699,7 @@ void Record::checkName() { assert(TypedName && "Record name is not typed!"); RecTy *Type = TypedName->getType(); if (dynamic_cast<StringRecTy *>(Type) == 0) { - llvm_unreachable("Record name is not a string!"); + throw "Record name is not a string!"; } } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 5f7b8b2..fb7d96a 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -272,16 +272,16 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true}, { ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true}, -{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false}, { ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, { ARM::VST1q16PseudoWB_register, ARM::VST1q16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,false}, { ARM::VST1q32PseudoWB_fixed, ARM::VST1q32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false}, { ARM::VST1q32PseudoWB_register, ARM::VST1q32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,true}, +{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,false}, { ARM::VST1q64PseudoWB_fixed, ARM::VST1q64wb_fixed, false, true, false, SingleSpc, 2, 1 ,false}, { ARM::VST1q64PseudoWB_register, ARM::VST1q64wb_register, false, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,false}, { ARM::VST1q8PseudoWB_fixed, ARM::VST1q8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, { ARM::VST1q8PseudoWB_register, ARM::VST1q8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 030fab1..4df084f 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -148,6 +148,8 @@ class ARMFastISel : public FastISel { virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); + virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); #include "ARMGenFastISel.inc" @@ -164,7 +166,8 @@ class ARMFastISel : public FastISel { bool SelectFPToSI(const Instruction *I); bool SelectSDiv(const Instruction *I); bool SelectSRem(const Instruction *I); - bool SelectCall(const Instruction *I); + bool SelectCall(const Instruction *I, const char *IntrMemName); + bool SelectIntrinsicCall(const IntrinsicInst &I); bool SelectSelect(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); @@ -176,10 +179,14 @@ class ARMFastISel : public FastISel { bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt, + bool allocReg); + bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); - void ARMSimplifyAddress(Address &Addr, EVT VT); + void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); + bool ARMIsMemCpySmall(uint64_t Len); + bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len); unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); @@ -212,7 +219,7 @@ class ARMFastISel : public FastISel { const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); void AddLoadStoreOperands(EVT VT, Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags); + unsigned Flags, bool useAM3); }; } // end anonymous namespace @@ -563,9 +570,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { // Use MVN to emit negative constants. if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) { unsigned Imm = (unsigned)~(CI->getSExtValue()); - bool EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : (ARM_AM::getSOImmVal(Imm) != -1); - if (EncodeImm) { + if (UseImm) { unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -723,7 +730,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. - if (VT == MVT::i8 || VT == MVT::i16) + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) return true; return false; @@ -852,7 +859,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { return Addr.Base.Reg != 0; } -void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { +void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { assert(VT.isSimple() && "Non-simple types are invalid here!"); @@ -860,21 +867,22 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unhandled load/store type!"); - case MVT::i16: - if (isThumb2) - // Integer loads/stores handle 12-bit offsets. - needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); - else - // ARM i16 integer loads/stores handle +/-imm8 offsets. - // FIXME: Negative offsets require special handling. - if (Addr.Offset > 255 || Addr.Offset < 0) - needsLowering = true; break; case MVT::i1: case MVT::i8: + case MVT::i16: case MVT::i32: - // Integer loads/stores handle 12-bit offsets. - needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); + if (!useAM3) { + // Integer loads/stores handle 12-bit offsets. + needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); + // Handle negative offsets. + if (needsLowering && isThumb2) + needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 && + Addr.Offset > -256); + } else { + // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. + needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); + } break; case MVT::f32: case MVT::f64: @@ -910,7 +918,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags) { + unsigned Flags, bool useAM3) { // addrmode5 output depends on the selection dag addressing dividing the // offset by 4 that it then later multiplies. Do this here as well. if (VT.getSimpleVT().SimpleTy == MVT::f32 || @@ -930,41 +938,78 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI); - // ARM halfword load/stores need an additional operand. - if (!isThumb2 && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); - - MIB.addImm(Addr.Offset); + // ARM halfword load/stores and signed byte loads need an additional operand. + if (useAM3) { + signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; + MIB.addReg(0); + MIB.addImm(Imm); + } else { + MIB.addImm(Addr.Offset); + } MIB.addMemOperand(MMO); } else { // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - // ARM halfword load/stores need an additional operand. - if (!isThumb2 && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); - - MIB.addImm(Addr.Offset); + // ARM halfword load/stores and signed byte loads need an additional operand. + if (useAM3) { + signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; + MIB.addReg(0); + MIB.addImm(Imm); + } else { + MIB.addImm(Addr.Offset); + } } AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { - +bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + bool isZExt = true, bool allocReg = true) { assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; - TargetRegisterClass *RC; + bool useAM3 = false; + TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; + case MVT::i1: case MVT::i8: - Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8; + else + Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12; + } else { + if (isZExt) { + Opc = ARM::LDRBi12; + } else { + Opc = ARM::LDRSB; + useAM3 = true; + } + } RC = ARM::GPRRegisterClass; break; case MVT::i16: - Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8; + else + Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12; + } else { + Opc = isZExt ? ARM::LDRH : ARM::LDRSH; + useAM3 = true; + } RC = ARM::GPRRegisterClass; break; case MVT::i32: - Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = ARM::t2LDRi8; + else + Opc = ARM::t2LDRi12; + } else { + Opc = ARM::LDRi12; + } RC = ARM::GPRRegisterClass; break; case MVT::f32: @@ -977,13 +1022,15 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { break; } // Simplify this down to something we can handle. - ARMSimplifyAddress(Addr, VT); + ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. - ResultReg = createResultReg(RC); + if (allocReg) + ResultReg = createResultReg(RC); + assert (ResultReg > 255 && "Expected an allocated virtual register."); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); - AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad); + AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); return true; } @@ -1009,6 +1056,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { unsigned StrOpc; + bool useAM3 = false; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; @@ -1022,13 +1070,35 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { SrcReg = Res; } // Fallthrough here. case MVT::i8: - StrOpc = isThumb2 ? ARM::t2STRBi12 : ARM::STRBi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRBi8; + else + StrOpc = ARM::t2STRBi12; + } else { + StrOpc = ARM::STRBi12; + } break; case MVT::i16: - StrOpc = isThumb2 ? ARM::t2STRHi12 : ARM::STRH; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRHi8; + else + StrOpc = ARM::t2STRHi12; + } else { + StrOpc = ARM::STRH; + useAM3 = true; + } break; case MVT::i32: - StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRi8; + else + StrOpc = ARM::t2STRi12; + } else { + StrOpc = ARM::STRi12; + } break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; @@ -1040,13 +1110,13 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { break; } // Simplify this down to something we can handle. - ARMSimplifyAddress(Addr, VT); + ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) .addReg(SrcReg, getKillRegState(true)); - AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore); + AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); return true; } @@ -1231,25 +1301,25 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // Check to see if the 2nd operand is a constant that we can encode directly // in the compare. - int EncodedImm = 0; - bool EncodeImm = false; + int Imm = 0; + bool UseImm = false; bool isNegativeImm = false; if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) { if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { const APInt &CIVal = ConstInt->getValue(); - EncodedImm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); - if (EncodedImm < 0) { + Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); + if (Imm < 0) { isNegativeImm = true; - EncodedImm = -EncodedImm; + Imm = -Imm; } - EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(EncodedImm) != -1) : - (ARM_AM::getSOImmVal(EncodedImm) != -1); + UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + (ARM_AM::getSOImmVal(Imm) != -1); } } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) { if (SrcVT == MVT::f32 || SrcVT == MVT::f64) if (ConstFP->isZero() && !ConstFP->isNegative()) - EncodeImm = true; + UseImm = true; } unsigned CmpOpc; @@ -1260,11 +1330,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // TODO: Verify compares. case MVT::f32: isICmp = false; - CmpOpc = EncodeImm ? ARM::VCMPEZS : ARM::VCMPES; + CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; break; case MVT::f64: isICmp = false; - CmpOpc = EncodeImm ? ARM::VCMPEZD : ARM::VCMPED; + CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; break; case MVT::i1: case MVT::i8: @@ -1273,12 +1343,12 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // Intentional fall-through. case MVT::i32: if (isThumb2) { - if (!EncodeImm) + if (!UseImm) CmpOpc = ARM::t2CMPrr; else CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri; } else { - if (!EncodeImm) + if (!UseImm) CmpOpc = ARM::CMPrr; else CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri; @@ -1290,7 +1360,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, if (SrcReg1 == 0) return false; unsigned SrcReg2; - if (!EncodeImm) { + if (!UseImm) { SrcReg2 = getRegForValue(Src2Value); if (SrcReg2 == 0) return false; } @@ -1301,14 +1371,14 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); if (ResultReg == 0) return false; SrcReg1 = ResultReg; - if (!EncodeImm) { + if (!UseImm) { ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); if (ResultReg == 0) return false; SrcReg2 = ResultReg; } } - if (!EncodeImm) { + if (!UseImm) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) .addReg(SrcReg1).addReg(SrcReg2)); @@ -1319,7 +1389,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. if (isICmp) - MIB.addImm(EncodedImm); + MIB.addImm(Imm); AddOptionalDefs(MIB); } @@ -1490,17 +1560,49 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { if (CondReg == 0) return false; unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - unsigned Op2Reg = getRegForValue(I->getOperand(2)); - if (Op2Reg == 0) return false; - unsigned CmpOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; + // Check to see if we can use an immediate in the conditional move. + int Imm = 0; + bool UseImm = false; + bool isNegativeImm = false; + if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) { + assert (VT == MVT::i32 && "Expecting an i32."); + Imm = (int)ConstInt->getValue().getZExtValue(); + if (Imm < 0) { + isNegativeImm = true; + Imm = ~Imm; + } + UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + (ARM_AM::getSOImmVal(Imm) != -1); + } + + unsigned Op2Reg; + if (!UseImm) { + Op2Reg = getRegForValue(I->getOperand(2)); + if (Op2Reg == 0) return false; + } + + unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(CondReg).addImm(1)); + .addReg(CondReg).addImm(0)); + + unsigned MovCCOpc; + if (!UseImm) { + MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; + } else { + if (!isNegativeImm) { + MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; + } else { + MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; + } + } unsigned ResultReg = createResultReg(RC); - unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) - .addReg(Op1Reg).addReg(Op2Reg) - .addImm(ARMCC::EQ).addReg(ARM::CPSR); + if (!UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) + .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) + .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); UpdateValueMap(I, ResultReg); return true; } @@ -1964,12 +2066,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return true; } -bool ARMFastISel::SelectCall(const Instruction *I) { +bool ARMFastISel::SelectCall(const Instruction *I, + const char *IntrMemName = 0) { const CallInst *CI = cast<CallInst>(I); const Value *Callee = CI->getCalledValue(); - // Can't handle inline asm or worry about intrinsics yet. - if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; + // Can't handle inline asm. + if (isa<InlineAsm>(Callee)) return false; // Only handle global variable Callees. const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); @@ -2011,8 +2114,12 @@ bool ARMFastISel::SelectCall(const Instruction *I) { ArgFlags.reserve(CS.arg_size()); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - unsigned Arg = getRegForValue(*i); + // If we're lowering a memory intrinsic instead of a regular call, skip the + // last two arguments, which shouldn't be passed to the underlying function. + if (IntrMemName && e-i <= 2) + break; + unsigned Arg = getRegForValue(*i); if (Arg == 0) return false; ISD::ArgFlagsTy Flags; @@ -2054,17 +2161,26 @@ bool ARMFastISel::SelectCall(const Instruction *I) { MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb2) - // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))) - .addGlobalAddress(GV, 0, 0); - else + if(isThumb2) { // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addGlobalAddress(GV, 0, 0)); - + TII.get(CallOpc))); + if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); + } else { + if (!IntrMemName) + // Explicitly adding the predicate here. + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc)) + .addGlobalAddress(GV, 0, 0)); + else + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc)) + .addExternalSymbol(IntrMemName, 0)); + } + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2079,6 +2195,98 @@ bool ARMFastISel::SelectCall(const Instruction *I) { return true; } +bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { + return Len <= 16; +} + +bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) { + // Make sure we don't bloat code by inlining very large memcpy's. + if (!ARMIsMemCpySmall(Len)) + return false; + + // We don't care about alignment here since we just emit integer accesses. + while (Len) { + MVT VT; + if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert(Len == 1); + VT = MVT::i8; + } + + bool RV; + unsigned ResultReg; + RV = ARMEmitLoad(VT, ResultReg, Src); + assert (RV = true && "Should be able to handle this load."); + RV = ARMEmitStore(VT, ResultReg, Dest); + assert (RV = true && "Should be able to handle this store."); + + unsigned Size = VT.getSizeInBits()/8; + Len -= Size; + Dest.Offset += Size; + Src.Offset += Size; + } + + return true; +} + +bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { + // FIXME: Handle more intrinsics. + switch (I.getIntrinsicID()) { + default: return false; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + const MemTransferInst &MTI = cast<MemTransferInst>(I); + // Don't handle volatile. + if (MTI.isVolatile()) + return false; + + // Disable inlining for memmove before calls to ComputeAddress. Otherwise, + // we would emit dead code because we don't currently handle memmoves. + bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); + if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) { + // Small memcpy's are common enough that we want to do them without a call + // if possible. + uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue(); + if (ARMIsMemCpySmall(Len)) { + Address Dest, Src; + if (!ARMComputeAddress(MTI.getRawDest(), Dest) || + !ARMComputeAddress(MTI.getRawSource(), Src)) + return false; + if (ARMTryEmitSmallMemCpy(Dest, Src, Len)) + return true; + } + } + + if (!MTI.getLength()->getType()->isIntegerTy(32)) + return false; + + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) + return false; + + const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove"; + return SelectCall(&I, IntrMemName); + } + case Intrinsic::memset: { + const MemSetInst &MSI = cast<MemSetInst>(I); + // Don't handle volatile. + if (MSI.isVolatile()) + return false; + + if (!MSI.getLength()->getType()->isIntegerTy(32)) + return false; + + if (MSI.getDestAddressSpace() > 255) + return false; + + return SelectCall(&I, "memset"); + } + } + return false; +} + bool ARMFastISel::SelectTrunc(const Instruction *I) { // The high bits for a type smaller than the register size are assumed to be // undefined. @@ -2150,8 +2358,6 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool ARMFastISel::SelectIntExt(const Instruction *I) { // On ARM, in general, integer casts don't involve legal types; this code // handles promotable integers. - // FIXME: We could save an instruction in many cases by special-casing - // load instructions. Type *DestTy = I->getType(); Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); @@ -2202,6 +2408,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::SRem: return SelectSRem(I); case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + return SelectIntrinsicCall(*II); return SelectCall(I); case Instruction::Select: return SelectSelect(I); @@ -2217,6 +2425,52 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return false; } +/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// vreg is being provided by the specified load instruction. If possible, +/// try to fold the load as an operand to the instruction, returning true if +/// successful. +bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(LI->getType(), VT)) + return false; + + // Combine load followed by zero- or sign-extend. + // ldrb r1, [r0] ldrb r1, [r0] + // uxtb r2, r1 => + // mov r3, r2 mov r3, r1 + bool isZExt = true; + switch(MI->getOpcode()) { + default: return false; + case ARM::SXTH: + case ARM::t2SXTH: + isZExt = false; + case ARM::UXTH: + case ARM::t2UXTH: + if (VT != MVT::i16) + return false; + break; + case ARM::SXTB: + case ARM::t2SXTB: + isZExt = false; + case ARM::UXTB: + case ARM::t2UXTB: + if (VT != MVT::i8) + return false; + break; + } + // See if we can handle this address. + Address Addr; + if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; + + unsigned ResultReg = MI->getOperand(0).getReg(); + if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false)) + return false; + MI->eraseFromParent(); + return true; +} + namespace llvm { llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { // Completely untested on non-darwin. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6f2b3b8..b55ef70 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -127,6 +127,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index c5bf607..06ee2c8 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -174,7 +174,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> { // ARM special operands for disassembly only. // -def SetEndAsmOperand : AsmOperandClass { +def SetEndAsmOperand : ImmAsmOperand { let Name = "SetEndImm"; let ParserMethod = "parseSetEndImm"; } @@ -820,7 +820,7 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, } // PKH instructions -def PKHLSLAsmOperand : AsmOperandClass { +def PKHLSLAsmOperand : ImmAsmOperand { let Name = "PKHLSLImm"; let ParserMethod = "parsePKHLSLImm"; } @@ -1991,3 +1991,59 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [HasNEON,UseNEONForFP]; } + +// VFP/NEON Instruction aliases for type suffices. +class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> : + InstAlias<!strconcat(opc, dt, asm), Result>; +multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> { + def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>; + def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>; + def U8 : VFPDataTypeInstAlias<opc, ".u8", asm, Result>; + def F8 : VFPDataTypeInstAlias<opc, ".p8", asm, Result>; +} +// VFPDT8ReqInstAlias plus plain ".8" +multiclass VFPDT8InstAlias<string opc, string asm, dag Result> { + def _8 : VFPDataTypeInstAlias<opc, ".8", asm, Result>; + defm : VFPDT8ReqInstAlias<opc, asm, Result>; +} +multiclass VFPDT16ReqInstAlias<string opc, string asm, dag Result> { + def I16 : VFPDataTypeInstAlias<opc, ".i16", asm, Result>; + def S16 : VFPDataTypeInstAlias<opc, ".s16", asm, Result>; + def U16 : VFPDataTypeInstAlias<opc, ".u16", asm, Result>; + def F16 : VFPDataTypeInstAlias<opc, ".p16", asm, Result>; +} +// VFPDT16ReqInstAlias plus plain ".16" +multiclass VFPDT16InstAlias<string opc, string asm, dag Result> { + def _16 : VFPDataTypeInstAlias<opc, ".16", asm, Result>; + defm : VFPDT16ReqInstAlias<opc, asm, Result>; +} +multiclass VFPDT32ReqInstAlias<string opc, string asm, dag Result> { + def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>; + def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>; + def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>; + def F32 : VFPDataTypeInstAlias<opc, ".f32", asm, Result>; + def F : VFPDataTypeInstAlias<opc, ".f", asm, Result>; +} +// VFPDT32ReqInstAlias plus plain ".32" +multiclass VFPDT32InstAlias<string opc, string asm, dag Result> { + def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>; + defm : VFPDT32ReqInstAlias<opc, asm, Result>; +} +multiclass VFPDT64ReqInstAlias<string opc, string asm, dag Result> { + def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; + def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; + def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; + def F64 : VFPDataTypeInstAlias<opc, ".f64", asm, Result>; + def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; +} +// VFPDT64ReqInstAlias plus plain ".64" +multiclass VFPDT64InstAlias<string opc, string asm, dag Result> { + def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; + defm : VFPDT64ReqInstAlias<opc, asm, Result>; +} +multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { + defm : VFPDT8InstAlias<opc, asm, Result>; + defm : VFPDT16InstAlias<opc, asm, Result>; + defm : VFPDT32InstAlias<opc, asm, Result>; + defm : VFPDT64InstAlias<opc, asm, Result>; +} diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index af1f490..770703c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -284,14 +284,6 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; }], hi16>; -/// imm0_65535 - An immediate is in the range [0.65535]. -def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; } -def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 65536; -}]> { - let ParserMatchClass = Imm0_65535AsmOperand; -} - class BinOpWithFlagFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>; class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; @@ -326,6 +318,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ // Operand Definitions. // +// Immediate operands with a shared generic asm render method. +class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; } + // Branch target. // FIXME: rename brtarget to t2_brtarget def brtarget : Operand<OtherVT> { @@ -496,7 +491,7 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. -def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; } +def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; } def so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getSOImmVal(Imm) != -1; }]> { @@ -521,7 +516,7 @@ def arm_i32imm : PatLeaf<(imm), [{ }]>; /// imm0_7 predicate - Immediate in the range [0,7]. -def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; } +def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 8; }]> { @@ -529,7 +524,7 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_15 predicate - Immediate in the range [0,15]. -def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; } +def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; } def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16; }]> { @@ -537,7 +532,7 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; } +def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; } def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]> { @@ -545,7 +540,7 @@ def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32]. -def Imm0_32AsmOperand: AsmOperandClass { let Name = "Imm0_32"; } +def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; } def imm0_32 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]> { @@ -553,25 +548,33 @@ def imm0_32 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_255 predicate - Immediate in the range [0,255]. -def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; } +def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { let ParserMatchClass = Imm0_255AsmOperand; } +/// imm0_65535 - An immediate is in the range [0.65535]. +def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; } +def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 65536; +}]> { + let ParserMatchClass = Imm0_65535AsmOperand; +} + // imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference // a relocatable expression. // // FIXME: This really needs a Thumb version separate from the ARM version. // While the range is the same, and can thus use the same match class, // the encoding is different so it should have a different encoder method. -def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; } +def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; } def imm0_65535_expr : Operand<i32> { let EncoderMethod = "getHiLo16ImmOpValue"; let ParserMatchClass = Imm0_65535ExprAsmOperand; } /// imm24b - True if the 32-bit immediate is encodable in 24 bits. -def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; } +def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; } def imm24b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 0xffffff; }]> { diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d3c4486b..49cc254 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1238,9 +1238,8 @@ class VST1D<bits<4> op7_4, string Dt> let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2, - "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; @@ -5180,3 +5179,170 @@ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; + + +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +// VAND/VEOR/VORR accept but do not require a type suffix. +defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; + +// VLD1 requires a size suffix, but also accepts type specific variants. +// Load one D register. +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; + +// Load two D registers. +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; + +// Load three D registers. +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; + + +// Load four D registers. +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; + +// VST1 requires a size suffix, but also accepts type specific variants. +// Store one D register. +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; + +// Store two D registers. +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; + +// FIXME: The three and four register VST1 instructions haven't been moved +// to the VecList* encoding yet, so we can't do assembly parsing support +// for them. Uncomment these when that happens. +// Load three D registers. +//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; +//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; +//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; +//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; + +// Load four D registers. +//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; +//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; +//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; +//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0a28226..03077c0 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -65,7 +65,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. -def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; } +def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; } def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getT2SOImmVal(Imm) != -1; }]> { diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e746cf2..488c508 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -69,11 +69,11 @@ def vfp_f64imm : Operand<f64>, let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), - IIC_fpLoad64, "vldr", ".64\t$Dd, $addr", + IIC_fpLoad64, "vldr", "\t$Dd, $addr", [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), - IIC_fpLoad32, "vldr", ".32\t$Sd, $addr", + IIC_fpLoad32, "vldr", "\t$Sd, $addr", [(set SPR:$Sd, (load addrmode5:$addr))]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. @@ -83,11 +83,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), - IIC_fpStore64, "vstr", ".64\t$Dd, $addr", + IIC_fpStore64, "vstr", "\t$Dd, $addr", [(store (f64 DPR:$Dd), addrmode5:$addr)]>; def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), - IIC_fpStore32, "vstr", ".32\t$Sd, $addr", + IIC_fpStore32, "vstr", "\t$Sd, $addr", [(store SPR:$Sd, addrmode5:$addr)]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. @@ -1163,3 +1163,12 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; +// VLDR/VSTR accept an optional type suffix. +defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr", + (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr", + (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr", + (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr", + (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 4c3be89..c8728f4 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -1504,6 +1505,23 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, return AddedRegPressure.size() <= MemRegs.size() * 2; } + +/// Copy Op0 and Op1 operands into a new array assigned to MI. +static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, + MachineInstr *Op1) { + assert(MI->memoperands_empty() && "expected a new machineinstr"); + size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) + + (Op1->memoperands_end() - Op1->memoperands_begin()); + + MachineFunction *MF = MI->getParent()->getParent(); + MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); + MachineSDNode::mmo_iterator MemEnd = + std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); + MemEnd = + std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); + MI->setMemRefs(MemBegin, MemEnd); +} + bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, @@ -1698,6 +1716,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (!isT2) MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + concatenateMemOperands(MIB, Op0, Op1); + DEBUG(dbgs() << "Formed " << *MIB << "\n"); ++NumLDRDFormed; } else { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) @@ -1710,6 +1730,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (!isT2) MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + concatenateMemOperands(MIB, Op0, Op1); + DEBUG(dbgs() << "Formed " << *MIB << "\n"); ++NumSTRDFormed; } MBB->erase(Op0); diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e782975..1d66d12 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -714,7 +714,7 @@ public: bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } bool isPostIdxReg() const { - return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift; + return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; } bool isMemNoOffset(bool alignOK = false) const { if (!isMemory()) @@ -1101,7 +1101,8 @@ public: void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); - assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!"); + assert(isRegShiftedReg() && + "addRegShiftedRegOperands() on non RegShiftedReg!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg)); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg)); Inst.addOperand(MCOperand::CreateImm( @@ -1110,7 +1111,8 @@ public: void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); - assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!"); + assert(isRegShiftedImm() && + "addRegShiftedImmOperands() on non RegShiftedImm!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg)); Inst.addOperand(MCOperand::CreateImm( ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm))); @@ -1189,26 +1191,6 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); } - void addImm0_255Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_7Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_15Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_31Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addImm1_16Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate-1, and we store in the instruction @@ -1225,26 +1207,6 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); } - void addImm0_32Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_65535Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm24bitOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addImmThumbSROperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate, except for 32, which encodes as @@ -1254,11 +1216,6 @@ public: Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm))); } - void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addPKHASRImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // An ASR value of 32 encodes as 0, so that's how we want to add it to @@ -1268,16 +1225,6 @@ public: Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val)); } - void addARMSOImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addT2SOImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a t2_so_imm, but we have its bitwise @@ -1294,11 +1241,6 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } - void addSetEndImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); @@ -1486,8 +1428,9 @@ public: void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); - unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, - Memory.ShiftImm, Memory.ShiftType); + unsigned Val = + ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, + Memory.ShiftImm, Memory.ShiftType); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -2410,6 +2353,29 @@ static unsigned getNextRegister(unsigned Reg) { } } +// Return the low-subreg of a given Q register. +static unsigned getDRegFromQReg(unsigned QReg) { + switch (QReg) { + default: llvm_unreachable("expected a Q register!"); + case ARM::Q0: return ARM::D0; + case ARM::Q1: return ARM::D2; + case ARM::Q2: return ARM::D4; + case ARM::Q3: return ARM::D6; + case ARM::Q4: return ARM::D8; + case ARM::Q5: return ARM::D10; + case ARM::Q6: return ARM::D12; + case ARM::Q7: return ARM::D14; + case ARM::Q8: return ARM::D16; + case ARM::Q9: return ARM::D19; + case ARM::Q10: return ARM::D20; + case ARM::Q11: return ARM::D22; + case ARM::Q12: return ARM::D24; + case ARM::Q13: return ARM::D26; + case ARM::Q14: return ARM::D28; + case ARM::Q15: return ARM::D30; + } +} + /// Parse a register list. bool ARMAsmParser:: parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -2425,6 +2391,16 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (Reg == -1) return Error(RegLoc, "register expected"); + // The reglist instructions have at most 16 registers, so reserve + // space for that many. + SmallVector<std::pair<unsigned, SMLoc>, 16> Registers; + + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + ++Reg; + } const MCRegisterClass *RC; if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) RC = &ARMMCRegisterClasses[ARM::GPRRegClassID]; @@ -2435,10 +2411,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { else return Error(RegLoc, "invalid register in register list"); - // The reglist instructions have at most 16 registers, so reserve - // space for that many. - SmallVector<std::pair<unsigned, SMLoc>, 16> Registers; - // Store the first register. + // Store the register. Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); // This starts immediately after the first register token in the list, @@ -2452,6 +2425,9 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { int EndReg = tryParseRegister(); if (EndReg == -1) return Error(EndLoc, "register expected"); + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) + EndReg = getDRegFromQReg(EndReg) + 1; // If the register is the same as the start reg, there's nothing // more to do. if (Reg == EndReg) @@ -2476,6 +2452,12 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); + // Allow Q regs and just interpret them as the two D sub-registers. + bool isQReg = false; + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + isQReg = true; + } // The register must be in the same register class as the first. if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); @@ -2489,6 +2471,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + if (isQReg) + Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc)); } SMLoc E = Parser.getTok().getLoc(); @@ -2500,29 +2484,6 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return false; } -// Return the low-subreg of a given Q register. -static unsigned getDRegFromQReg(unsigned QReg) { - switch (QReg) { - default: llvm_unreachable("expected a Q register!"); - case ARM::Q0: return ARM::D0; - case ARM::Q1: return ARM::D2; - case ARM::Q2: return ARM::D4; - case ARM::Q3: return ARM::D6; - case ARM::Q4: return ARM::D8; - case ARM::Q5: return ARM::D10; - case ARM::Q6: return ARM::D12; - case ARM::Q7: return ARM::D14; - case ARM::Q8: return ARM::D16; - case ARM::Q9: return ARM::D19; - case ARM::Q10: return ARM::D20; - case ARM::Q11: return ARM::D22; - case ARM::Q12: return ARM::D24; - case ARM::Q13: return ARM::D26; - case ARM::Q14: return ARM::D28; - case ARM::Q15: return ARM::D30; - } -} - // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -4161,6 +4122,22 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, return false; } +static bool isDataTypeToken(StringRef Tok) { + return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" || + Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" || + Tok == ".u8" || Tok == ".u16" || Tok == ".u32" || Tok == ".u64" || + Tok == ".s8" || Tok == ".s16" || Tok == ".s32" || Tok == ".s64" || + Tok == ".p8" || Tok == ".p16" || Tok == ".f32" || Tok == ".f64" || + Tok == ".f" || Tok == ".d"; +} + +// FIXME: This bit should probably be handled via an explicit match class +// in the .td files that matches the suffix instead of having it be +// a literal string token the way it is now. +static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { + return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); +} + /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -4265,9 +4242,12 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Next = Name.find('.', Start + 1); StringRef ExtraToken = Name.slice(Start, Next); - // For now, we're only parsing Thumb1 (for the most part), so - // just ignore ".n" qualifiers. We'll use them to restrict - // matching when we do Thumb2. + // Some NEON instructions have an optional datatype suffix that is + // completely ignored. Check for that. + if (isDataTypeToken(ExtraToken) && + doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken)) + continue; + if (ExtraToken != ".n") { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 6927d2d..0b9b5d0 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -10,13 +10,13 @@ #define DEBUG_TYPE "arm-disassembler" #include "ARM.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" @@ -2267,10 +2267,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Second input register switch (Inst.getOpcode()) { - case ARM::VST1q8: - case ARM::VST1q16: - case ARM::VST1q32: - case ARM::VST1q64: case ARM::VST1d8T: case ARM::VST1d16T: case ARM::VST1d32T: diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index dff57b4..baa9bc3 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = ARMDisassembler parent = ARM -required_libraries = ARMDesc ARMInfo MC Support +required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index adc37cb..f529314 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -12,8 +12,8 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) add_llvm_library_dependencies(LLVMARMDesc - LLVMARMInfo LLVMARMAsmPrinter + LLVMARMInfo LLVMMC LLVMSupport ) diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt index 7d7504f..046c1fc 100644 --- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt +++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = ARMInfo parent = ARM -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = ARM diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt index 943fe2d..35752b7 100644 --- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = CBackendInfo parent = CBackend -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = CBackend diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt index b5147ae..abc44a2 100644 --- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = CellSPUDesc parent = CellSPU -required_libraries = CellSPUInfo MC Support +required_libraries = CellSPUInfo MC add_to_library_groups = CellSPU diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt index 7525359..0710cc3 100644 --- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = CellSPUInfo parent = CellSPU -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = CellSPU diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt index 7165d8f..738b215 100644 --- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt +++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt @@ -6,5 +6,6 @@ add_llvm_library(LLVMCppBackendInfo add_llvm_library_dependencies(LLVMCppBackendInfo LLVMMC + LLVMSupport LLVMTarget ) diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt index b130fee..67a23ba 100644 --- a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = CppBackendInfo parent = CppBackend -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = CppBackend diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 60f5230..358cbc8 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -15,39 +15,39 @@ ; ;===------------------------------------------------------------------------===; -[component_0] -type = Library -name = Target -parent = Libraries -required_libraries = Core MC Support - ; This is a special group whose required libraries are extended (by llvm-build) -; with every built target, which makes it easy for tools to include every -; target. -[component_1] +; with the best execution engine (the native JIT, if available, or the +; interpreter). +[component_0] type = LibraryGroup -name = all-targets +name = Engine parent = Libraries ; This is a special group whose required libraries are extended (by llvm-build) ; with the configured native target, if any. -[component_2] +[component_1] type = LibraryGroup name = Native parent = Libraries ; This is a special group whose required libraries are extended (by llvm-build) ; with the configured native code generator, if any. -[component_3] +[component_2] type = LibraryGroup name = NativeCodeGen parent = Libraries +; The component for the actual target library itself. +[component_3] +type = Library +name = Target +parent = Libraries +required_libraries = Core MC Support + ; This is a special group whose required libraries are extended (by llvm-build) -; with the best execution engine (the native JIT, if available, or the -; interpreter). +; with every built target, which makes it easy for tools to include every +; target. [component_4] type = LibraryGroup -name = Engine +name = all-targets parent = Libraries - diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt index 112c64c..e0a53ee 100644 --- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt +++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt @@ -14,7 +14,6 @@ set_property( endif() add_llvm_library_dependencies(LLVMMBlazeDisassembler - LLVMMBlazeCodeGen LLVMMBlazeDesc LLVMMBlazeInfo LLVMMC diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt index b2b3a3a..c5c4f80 100644 --- a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt +++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt @@ -19,12 +19,6 @@ type = Library name = MBlazeDisassembler parent = MBlaze -; Strictly speaking, we probably shouldn't have a dependency on -; MBlazeCodeGen. However, given the current factoring we end up including -; MBlazeGenRegisterInfo.inc in the disassembler. Those generated headers end up -; referencing external variables through GPRRegClass, SPRRegClass, and -; CRCRegClass. These aren't actually used, but some compilers may generate -; references to them. -required_libraries = MBlazeCodeGen MBlazeDesc MBlazeInfo MC Support +required_libraries = MBlazeDesc MBlazeInfo MC Support add_to_library_groups = MBlaze diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index c3a3833..3087317 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -13,13 +13,12 @@ //===----------------------------------------------------------------------===// #include "MBlaze.h" -#include "MBlazeInstrInfo.h" #include "MBlazeDisassembler.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt index 488c2c7..938a1d9 100644 --- a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt +++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = MBlazeInfo parent = MBlaze -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = MBlaze diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt index 04bd03e..c2dd448 100644 --- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt @@ -7,6 +7,8 @@ add_llvm_library_dependencies(LLVMMSP430Desc LLVMMC LLVMMSP430AsmPrinter LLVMMSP430Info + LLVMSupport + LLVMTarget ) add_dependencies(LLVMMSP430Desc MSP430CommonTableGen) diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt index 5d41082..1890e9d 100644 --- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = MSP430Desc parent = MSP430 -required_libraries = MC MSP430AsmPrinter MSP430Info Support +required_libraries = MC MSP430AsmPrinter MSP430Info Support Target add_to_library_groups = MSP430 diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt index 3bcc826..a745ea8 100644 --- a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt +++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = MSP430Info parent = MSP430 -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = MSP430 diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index e81ba6f..53656d4d 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info) tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info) tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 4a815f3..f544d39 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -118,7 +118,10 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { OS << Offset; } - if (Kind != MCSymbolRefExpr::VK_None) + if ((Kind == MCSymbolRefExpr::VK_Mips_GPOFF_HI) || + (Kind == MCSymbolRefExpr::VK_Mips_GPOFF_LO)) + OS << ")))"; + else if (Kind != MCSymbolRefExpr::VK_None) OS << ')'; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index f190ec4..4f017d0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -1,5 +1,21 @@ +//===-- MipsASMBackend.cpp - ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MipsAsmBackend and MipsELFObjectWriter classes. +// +//===----------------------------------------------------------------------===// +// + +#include "MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -8,7 +24,6 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" @@ -16,7 +31,50 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { + + // Add/subtract and shift + switch (Kind) { + default: + break; + case Mips::fixup_Mips_PC16: + // So far we are only using this type for branches. + // For branches we start 1 instruction after the branch + // so the displacement will be one instruction size less. + Value -= 4; + // The displacement is then divided by 4 to give us an 18 bit + // address range. + Value >>= 2; + break; + case Mips::fixup_Mips_26: + // So far we are only using this type for jumps. + // The displacement is then divided by 4 to give us an 28 bit + // address range. + Value >>= 2; + break; + } + + // Mask off value for placement as an operand + switch (Kind) { + default: + break; + case FK_Data_4: + Value &= 0xffffffff; + break; + case Mips::fixup_Mips_26: + Value &= 0x03ffffff; + break; + case Mips::fixup_Mips_LO16: + case Mips::fixup_Mips_PC16: + Value &= 0x0000ffff; + break; + } + + return Value; +} + namespace { + class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine, @@ -27,18 +85,75 @@ public: class MipsAsmBackend : public MCAsmBackend { public: - MipsAsmBackend(const Target &T) - : MCAsmBackend() {} - - unsigned getNumFixupKinds() const { - return 1; //tbd - } + MipsAsmBackend(const Target &T) : MCAsmBackend() {} /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { + unsigned Kind = (unsigned)Fixup.getKind(); + Value = adjustFixupValue(Kind, Value); + + if (!Value) + return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case Mips::fixup_Mips_GOT16: // This will be fixed up at link time + break; + case FK_Data_4: + case Mips::fixup_Mips_26: + case Mips::fixup_Mips_LO16: + case Mips::fixup_Mips_PC16: + // For each byte of the fragment that the fixup touches, mask i + // the fixup value. The Value has been "split up" into the appr + // bitfields above. + for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + break; + } + } + + unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds a + // MipsFixupKinds.h. + // + // name offset bits flags + { "fixup_Mips_NONE", 0, 0, 0 }, + { "fixup_Mips_16", 0, 16, 0 }, + { "fixup_Mips_32", 0, 32, 0 }, + { "fixup_Mips_REL32", 0, 32, 0 }, + { "fixup_Mips_26", 0, 26, 0 }, + { "fixup_Mips_HI16", 0, 16, 0 }, + { "fixup_Mips_LO16", 0, 16, 0 }, + { "fixup_Mips_GPREL16", 0, 16, 0 }, + { "fixup_Mips_LITERAL", 0, 16, 0 }, + { "fixup_Mips_GOT16", 0, 16, 0 }, + { "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_CALL16", 0, 16, 0 }, + { "fixup_Mips_GPREL32", 0, 32, 0 }, + { "fixup_Mips_SHIFT5", 6, 5, 0 }, + { "fixup_Mips_SHIFT6", 6, 5, 0 }, + { "fixup_Mips_64", 0, 64, 0 }, + { "fixup_Mips_TLSGD", 0, 16, 0 }, + { "fixup_Mips_GOTTPREL", 0, 16, 0 }, + { "fixup_Mips_TPREL_HI", 0, 16, 0 }, + { "fixup_Mips_TPREL_LO", 0, 16, 0 }, + { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel } + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; } /// @name Target Relaxation Interfaces @@ -52,24 +167,24 @@ public: return false; } - /// RelaxInstruction - Relax the instruction in the given fragment to the next - /// wider instruction. + /// RelaxInstruction - Relax the instruction in the given fragment + /// to the next wider instruction. /// - /// \param Inst - The instruction to relax, which may be the same as the - /// output. + /// \param Inst - The instruction to relax, which may be the same + /// as the output. /// \parm Res [output] - On return, the relaxed instruction. void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { } /// @} - /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given - /// output. If the target cannot generate such a sequence, it should return an - /// error. + /// WriteNopData - Write an (optimal) nop sequence of Count bytes + /// to the given output. If the target cannot generate such a sequence, + /// it should return an error. /// /// \return - True on success. bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { - return false; + return true; } }; @@ -106,7 +221,7 @@ public: return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false); } }; -} +} // namespace MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index f7a6fa9..cebfde0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -19,6 +19,88 @@ #include "llvm/Support/ErrorHandling.h" namespace llvm { + +/// MipsII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace MipsII { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // Mips Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_GOT - Represents the offset into the global offset table at which + /// the address the relocation entry symbol resides during execution. + MO_GOT, + + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides + /// during execution. This is different from the above since this flag + /// can only be present in call instructions. + MO_GOT_CALL, + + /// MO_GPREL - Represents the offset from the current gp value to be used + /// for the relocatable object file being produced. + MO_GPREL, + + /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HI, + MO_ABS_LO, + + /// MO_TLSGD - Represents the offset into the global offset table at which + // the module ID and TSL block offset reside during execution (General + // Dynamic TLS). + MO_TLSGD, + + /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial + // Exec TLS). + MO_GOTTPREL, + + /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from + // the thread pointer (Local Exec TLS). + MO_TPREL_HI, + MO_TPREL_LO, + + // N32/64 Flags. + MO_GPOFF_HI, + MO_GPOFF_LO, + MO_GOT_DISP, + MO_GOT_PAGE, + MO_GOT_OFST + }; + + enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for + // Mips instructions. + // + + // Pseudo - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// FrmR - This form is for instructions of the format R. + FrmR = 1, + /// FrmI - This form is for instructions of the format I. + FrmI = 2, + /// FrmJ - This form is for instructions of the format J. + FrmJ = 3, + /// FrmFR - This form is for instructions of the format FR. + FrmFR = 4, + /// FrmFI - This form is for instructions of the format FI. + FrmFI = 5, + /// FrmOther - This form is for instructions that have no specific format. + FrmOther = 6, + + FormMask = 15 + }; +} + + /// getMipsRegisterNumbering - Given the enum value for some register, /// return the number that it corresponds to. inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index d66de23..1115fec 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -12,16 +12,18 @@ //===----------------------------------------------------------------------===// // #define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MCTargetDesc/MipsFixupKinds.h" +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" -#include "MCTargetDesc/MipsMCTargetDesc.h" using namespace llvm; @@ -31,22 +33,217 @@ class MipsMCCodeEmitter : public MCCodeEmitter { void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT const MCInstrInfo &MCII; const MCSubtargetInfo &STI; + MCContext &Ctx; public: MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : MCII(mcii), STI(sti) {} + MCContext &ctx) : MCII(mcii), STI(sti) , Ctx(ctx) {} ~MipsMCCodeEmitter() {} - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { + void EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; + } + + void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const { + // Output the instruction encoding in little endian byte order. + for (unsigned i = 0; i != Size; ++i) { + EmitByte(Val & 255, OS); + Val >>= 8; + } } + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + unsigned getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + + // getBranchJumpOpValue - Return binary encoding of the jump + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + + // getBranchTargetOpValue - Return binary encoding of the branch + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + + // getMachineOpValue - Return binary encoding of operand. If the machin + // operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + }; // class MipsMCCodeEmitter } // namespace MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, - MCContext &Ctx) { + MCContext &Ctx) +{ return new MipsMCCodeEmitter(MCII, STI, Ctx); } + +/// EncodeInstruction - Emit the instruction. +/// Size the instruction (currently only 4 bytes +void MipsMCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const +{ + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + + // Check for unimplemented opcodes. + // Unfortunately in MIPS both NOT and SLL will come in with Binary == 0 + // so we have to special check for them. + unsigned Opcode = MI.getOpcode(); + if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary) + llvm_unreachable("unimplemented opcode in EncodeInstruction()"); + + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + // Pseudo instructions don't get encoded and shouldn't be here + // in the first place! + if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo) + llvm_unreachable("Pseudo opcode found in EncodeInstruction()"); + + // For now all instructions are 4 bytes + int Size = 4; // FIXME: Have Desc.getSize() return the correct value! + + EmitInstruction(Binary, Size, OS); +} + +/// getBranchTargetOpValue - Return binary encoding of the branch +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_Mips_PC16))); + return 0; +} + +/// getJumpTargetOpValue - Return binary encoding of the jump +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_Mips_26))); + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + unsigned RegNo = getMipsRegisterNumbering(Reg); + return RegNo; + } else if (MO.isImm()) { + return static_cast<unsigned>(MO.getImm()); + } else if (MO.isFPImm()) { + return static_cast<unsigned>(APFloat(MO.getFPImm()) + .bitcastToAPInt().getHiBits(32).getLimitedValue()); + } else if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + MCExpr::ExprKind Kind = Expr->getKind(); + if (Kind == MCExpr::SymbolRef) { + Mips::Fixups FixupKind = Mips::fixup_Mips_NONE; + MCSymbolRefExpr::VariantKind SymRefKind = + cast<MCSymbolRefExpr>(Expr)->getKind(); + switch(SymRefKind) { + case MCSymbolRefExpr::VK_Mips_GPREL: + FixupKind = Mips::fixup_Mips_GPREL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT_CALL: + FixupKind = Mips::fixup_Mips_CALL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT: + FixupKind = Mips::fixup_Mips_GOT16; + break; + case MCSymbolRefExpr::VK_Mips_ABS_HI: + FixupKind = Mips::fixup_Mips_HI16; + break; + case MCSymbolRefExpr::VK_Mips_ABS_LO: + FixupKind = Mips::fixup_Mips_LO16; + break; + case MCSymbolRefExpr::VK_Mips_TLSGD: + FixupKind = Mips::fixup_Mips_TLSGD; + break; + case MCSymbolRefExpr::VK_Mips_GOTTPREL: + FixupKind = Mips::fixup_Mips_GOTTPREL; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_HI: + FixupKind = Mips::fixup_Mips_TPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_LO: + FixupKind = Mips::fixup_Mips_TPREL_LO; + break; + default: + return 0; + } // switch + Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); + } // if SymbolRef + // All of the information is in the fixup. + return 0; + } + llvm_unreachable("Unable to encode MCOperand!"); + // Not reached + return 0; +} + +/// getMemEncoding - Return binary encoding of memory related operand. +/// If the offset operand requires relocation, record the relocation. +unsigned +MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + // Base register is encoded in bits 20-16, offset is encoded in bits 15-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups) << 16; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups); + + return (OffBits & 0xFFFF) | RegBits; +} + +unsigned +MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + // FIXME: implement + return 0; +} + +unsigned +MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + // FIXME: implement + return 0; +} + +#include "MipsGenMCCodeEmitter.inc" + diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 1f9e3dd..e6040e4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MipsMCTargetDesc.h" #include "MipsMCAsmInfo.h" +#include "MipsMCTargetDesc.h" #include "InstPrinter/MipsInstPrinter.h" #include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCCodeGenInfo.h" @@ -140,6 +140,9 @@ extern "C" void LLVMInitializeMipsTargetMC() { TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend); TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend); + TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget, createMipsMCCodeEmitter); + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, createMipsMCSubtargetInfo); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 7a0042a..fc43d2d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -16,12 +16,14 @@ namespace llvm { class MCAsmBackend; -class MCInstrInfo; class MCCodeEmitter; class MCContext; +class MCInstrInfo; +class MCObjectWriter; class MCSubtargetInfo; class StringRef; class Target; +class raw_ostream; extern Target TheMipsTarget; extern Target TheMipselTarget; @@ -33,6 +35,7 @@ MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT); + } // End llvm namespace // Defines symbolic names for Mips registers. This defines a mapping from diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index d72693c..94f7c18 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -15,7 +15,7 @@ TARGET = Mips BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ - MipsGenSubtargetInfo.inc + MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc DIRS = InstPrinter TargetInfo MCTargetDesc diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 9a769e8..3c97241 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -51,12 +51,58 @@ class shift_rotate_imm64_32<bits<6> func, bits<5> isRotate, string instr_asm, shift_rotate_imm<func, isRotate, instr_asm, OpNode, imm32_63, shamt, CPU64Regs>; +// Jump and Link (Call) +let isCall=1, hasDelaySlot=1, + // All calls clobber the non-callee saved registers... + Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in { + class JumpLink64<bits<6> op, string instr_asm>: + FJ<op, (outs), (ins calltarget64:$target, variable_ops), + !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], + IIBranch>; + + class JumpLinkReg64<bits<6> op, bits<6> func, string instr_asm>: + FR<op, func, (outs), (ins CPU64Regs:$rs, variable_ops), + !strconcat(instr_asm, "\t$rs"), + [(MipsJmpLink CPU64Regs:$rs)], IIBranch> { + let rt = 0; + let rd = 31; + let shamt = 0; + } + + class BranchLink64<string instr_asm>: + FI<0x1, (outs), (ins CPU64Regs:$rs, brtarget:$imm16, variable_ops), + !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch>; +} + // Mul, Div class Mult64<bits<6> func, string instr_asm, InstrItinClass itin>: Mult<func, instr_asm, itin, CPU64Regs, [HI64, LO64]>; class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: Div<op, func, instr_asm, itin, CPU64Regs, [HI64, LO64]>; +multiclass Atomic2Ops64<PatFrag Op, string Opstr> { + def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>, Requires<[NotN64]>; + def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, Requires<[IsN64]>; +} + +multiclass AtomicCmpSwap64<PatFrag Op, string Width> { + def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>, Requires<[NotN64]>; + def _P8 : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>, + Requires<[IsN64]>; +} + +let usesCustomInserter = 1, Predicates = [HasMips64] in { + defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64, "load_add_64">; + defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">; + defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64, "load_and_64">; + defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64, "load_or_64">; + defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64, "load_xor_64">; + defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64, "load_nand_64">; + defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64, "swap_64">; + defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64, "64">; +} + //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -122,7 +168,15 @@ defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>; defm ULD : LoadM64<0x37, "uld", load_u, 1>; defm USD : StoreM64<0x3f, "usd", store_u, 1>; +/// Load-linked, Store-conditional +def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, Requires<[NotN64]>; +def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]>; +def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>; +def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>; + /// Jump and Branch Instructions +def JAL64 : JumpLink64<0x03, "jal">; +def JALR64 : JumpLinkReg64<0x00, 0x09, "jalr">; def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; @@ -145,6 +199,12 @@ def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>; def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>; def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>; +def LEA_ADDiu64 : EffectiveAddress<"addiu\t$rt, $addr", CPU64Regs, mem_ea_64>; + +let Uses = [SP_64] in +def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, + Requires<[IsN64]>; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -155,11 +215,20 @@ def : Pat<(i64 immSExt16:$in), def : Pat<(i64 immZExt16:$in), (ORi64 ZERO_64, imm:$in)>; -// zextloadi32_u -def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64_P8 addr:$a), 0), 0)>, - Requires<[IsN64]>; -def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64 addr:$a), 0), 0)>, - Requires<[NotN64]>; +// Arbitrary immediates +def : Pat<(i64 imm:$imm), + (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; + +// extended loads +let Predicates = [NotN64] in { + def : Pat<(extloadi32_a addr:$a), (DSRL32 (DSLL32 (LW64 addr:$a), 0), 0)>; + def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64 addr:$a), 0), 0)>; +} +let Predicates = [IsN64] in { + def : Pat<(extloadi32_a addr:$a), (DSRL32 (DSLL32 (LW64_P8 addr:$a), 0), 0)>; + def : Pat<(zextloadi32_u addr:$a), + (DSRL32 (DSLL32 (ULW64_P8 addr:$a), 0), 0)>; +} // hi/lo relocs def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>; @@ -174,6 +243,9 @@ defm : SetgtPats<CPU64Regs, SLT64, SLTu64>; defm : SetgePats<CPU64Regs, SLT64, SLTu64>; defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>; +// select MipsDynAlloc +def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>; + // truncate def : Pat<(i32 (trunc CPU64Regs:$src)), (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index d7b7f06..186a5e3 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "MipsMCInstLower.h" #include "MipsMCSymbolRefExpr.h" #include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 0ae4ef6..3d973ce 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -35,8 +35,9 @@ def RetCC_MipsO32 : CallingConv<[ //===----------------------------------------------------------------------===// def CC_MipsN : CallingConv<[ - // FIXME: Handle byval, complex and float double parameters. - + // Handles byval parameters. + CCIfByVal<CCCustom<"CC_Mips64Byval">>, + // Promote i8/i16/i32 arguments to i64. CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, @@ -63,6 +64,25 @@ def CC_MipsN : CallingConv<[ CCIfType<[f32], CCAssignToStack<4, 8>> ]>; +// N32/64 variable arguments. +// All arguments are passed in integer registers. +def CC_MipsN_VarArg : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCCustom<"CC_Mips64Byval">>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, + T0_64, T1_64, T2_64, T3_64]>>, + + CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>, + + // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. + CCIfType<[i64, f64], CCAssignToStack<8, 8>>, + CCIfType<[f32], CCAssignToStack<4, 8>> +]>; + def RetCC_MipsN : CallingConv<[ // FIXME: Handle complex and float double return values. diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index dc4ecd6..a8f29ae 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -18,18 +18,20 @@ #include "MipsRelocations.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/PassManager.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -37,8 +39,6 @@ #include <iomanip> #endif -#include "llvm/CodeGen/MachineOperand.h" - using namespace llvm; STATISTIC(NumEmitted, "Number of machine instructions emitted"); @@ -66,9 +66,9 @@ class MipsCodeEmitter : public MachineFunctionPass { public: MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), - II((const MipsInstrInfo *) tm.getInstrInfo()), - TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) { + II((const MipsInstrInfo *) tm.getInstrInfo()), + TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) { } bool runOnMachineFunction(MachineFunction &MF); @@ -91,7 +91,7 @@ class MipsCodeEmitter : public MachineFunctionPass { /// Routines that handle operands which add machine relocations which are /// fixed up by the relocation stage. void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub) const; + bool MayNeedFarStub) const; void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; @@ -105,6 +105,9 @@ class MipsCodeEmitter : public MachineFunctionPass { unsigned getRelocation(const MachineInstr &MI, const MachineOperand &MO) const; + unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; @@ -165,23 +168,34 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, return Mips::reloc_mips_lo; } +unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const { + // FIXME: implement + return 0; +} + +unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const { + // FIXME: implement + return 0; +} + unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI, - unsigned OpNo) const { + unsigned OpNo) const { // Base register is encoded in bits 20-16, offset is encoded in bits 15-0. assert(MI.getOperand(OpNo).isReg()); unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16; - return - (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits; + return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits; } unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI, - unsigned OpNo) const { + unsigned OpNo) const { // size is encoded as size-1. return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; } unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI, - unsigned OpNo) const { + unsigned OpNo) const { // size is encoded as pos+size-1. return getMachineOpValue(MI, MI.getOperand(OpNo-1)) + getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; @@ -190,7 +204,7 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI, /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const { + const MachineOperand &MO) const { if (MO.isReg()) return MipsRegisterInfo::getRegisterNumbering(MO.getReg()); else if (MO.isImm()) @@ -217,9 +231,10 @@ unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, } void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub) const { + bool MayNeedFarStub) const { MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - const_cast<GlobalValue *>(GV), 0, MayNeedFarStub)); + const_cast<GlobalValue *>(GV), 0, + MayNeedFarStub)); } void MipsCodeEmitter::emitGlobalAddressUnaligned(const GlobalValue *GV, @@ -248,7 +263,7 @@ emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { } void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, - unsigned Reloc) const { + unsigned Reloc) const { MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), Reloc, BB)); } @@ -395,7 +410,7 @@ void MipsCodeEmitter::emitWordLE(unsigned Word) { /// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips /// code to the specified MCE object. FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM, - JITCodeEmitter &JCE) { + JITCodeEmitter &JCE) { return new MipsCodeEmitter(TM, JCE); } diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 71f3116..19bb1a5 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -14,6 +14,7 @@ #include "MipsFrameLowering.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -149,6 +150,11 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned NewReg = 0; int NewImm = 0; bool ATUsed; + unsigned GP = STI.isABI_N64() ? Mips::GP_64 : Mips::GP; + unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; // First, compute final stack size. unsigned RegSize = STI.isGP32bit() ? 4 : 8; @@ -157,7 +163,6 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) : MipsFI->getMaxCallFrameSize(); unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) + - AlignOffset(MipsFI->getRegSaveAreaSize(), StackAlign) + AlignOffset(MFI->getStackSize(), StackAlign); // Update stack size @@ -165,10 +170,25 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); - // TODO: check need from GP here. + // Emit instructions that set $gp using the the value of $t9. + // O32 uses the directive .cpload while N32/64 requires three instructions to + // do this. + // TODO: Do not emit these instructions if no instructions use $gp. if (isPIC && STI.isABI_O32()) BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)) .addReg(RegInfo->getPICCallReg()); + else if (STI.isABI_N64() || (isPIC && STI.isABI_N32())) { + // lui $28,%hi(%neg(%gp_rel(fname))) + // addu $28,$28,$25 + // addiu $28,$28,%lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, MBBI, dl, TII.get(LUi), GP) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), GP).addReg(GP).addReg(T9); + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), GP).addReg(GP) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + } + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); // No need to allocate space on the stack. diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 50aa78f..b595f03 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" #include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -395,7 +396,8 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && SelectMadd(N, &DAG)) + if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + SelectMadd(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -407,7 +409,8 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && SelectMsub(N, &DAG)) + if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + SelectMsub(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -794,60 +797,108 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, assert(false && "Unexpected instr type to insert"); return NULL; case Mips::ATOMIC_LOAD_ADD_I8: + case Mips::ATOMIC_LOAD_ADD_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I16: + case Mips::ATOMIC_LOAD_ADD_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I32: + case Mips::ATOMIC_LOAD_ADD_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::ADDu); + case Mips::ATOMIC_LOAD_ADD_I64: + case Mips::ATOMIC_LOAD_ADD_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::DADDu); case Mips::ATOMIC_LOAD_AND_I8: + case Mips::ATOMIC_LOAD_AND_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND); case Mips::ATOMIC_LOAD_AND_I16: + case Mips::ATOMIC_LOAD_AND_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND); case Mips::ATOMIC_LOAD_AND_I32: + case Mips::ATOMIC_LOAD_AND_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::AND); + case Mips::ATOMIC_LOAD_AND_I64: + case Mips::ATOMIC_LOAD_AND_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::AND64); case Mips::ATOMIC_LOAD_OR_I8: + case Mips::ATOMIC_LOAD_OR_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR); case Mips::ATOMIC_LOAD_OR_I16: + case Mips::ATOMIC_LOAD_OR_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR); case Mips::ATOMIC_LOAD_OR_I32: + case Mips::ATOMIC_LOAD_OR_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::OR); + case Mips::ATOMIC_LOAD_OR_I64: + case Mips::ATOMIC_LOAD_OR_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::OR64); case Mips::ATOMIC_LOAD_XOR_I8: + case Mips::ATOMIC_LOAD_XOR_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I16: + case Mips::ATOMIC_LOAD_XOR_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I32: + case Mips::ATOMIC_LOAD_XOR_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::XOR); + case Mips::ATOMIC_LOAD_XOR_I64: + case Mips::ATOMIC_LOAD_XOR_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::XOR64); case Mips::ATOMIC_LOAD_NAND_I8: + case Mips::ATOMIC_LOAD_NAND_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, 0, true); case Mips::ATOMIC_LOAD_NAND_I16: + case Mips::ATOMIC_LOAD_NAND_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, 0, true); case Mips::ATOMIC_LOAD_NAND_I32: + case Mips::ATOMIC_LOAD_NAND_I32_P8: return EmitAtomicBinary(MI, BB, 4, 0, true); + case Mips::ATOMIC_LOAD_NAND_I64: + case Mips::ATOMIC_LOAD_NAND_I64_P8: + return EmitAtomicBinary(MI, BB, 8, 0, true); case Mips::ATOMIC_LOAD_SUB_I8: + case Mips::ATOMIC_LOAD_SUB_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I16: + case Mips::ATOMIC_LOAD_SUB_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I32: + case Mips::ATOMIC_LOAD_SUB_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::SUBu); + case Mips::ATOMIC_LOAD_SUB_I64: + case Mips::ATOMIC_LOAD_SUB_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::DSUBu); case Mips::ATOMIC_SWAP_I8: + case Mips::ATOMIC_SWAP_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, 0); case Mips::ATOMIC_SWAP_I16: + case Mips::ATOMIC_SWAP_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, 0); case Mips::ATOMIC_SWAP_I32: + case Mips::ATOMIC_SWAP_I32_P8: return EmitAtomicBinary(MI, BB, 4, 0); + case Mips::ATOMIC_SWAP_I64: + case Mips::ATOMIC_SWAP_I64_P8: + return EmitAtomicBinary(MI, BB, 8, 0); case Mips::ATOMIC_CMP_SWAP_I8: + case Mips::ATOMIC_CMP_SWAP_I8_P8: return EmitAtomicCmpSwapPartword(MI, BB, 1); case Mips::ATOMIC_CMP_SWAP_I16: + case Mips::ATOMIC_CMP_SWAP_I16_P8: return EmitAtomicCmpSwapPartword(MI, BB, 2); case Mips::ATOMIC_CMP_SWAP_I32: + case Mips::ATOMIC_CMP_SWAP_I32_P8: return EmitAtomicCmpSwap(MI, BB, 4); + case Mips::ATOMIC_CMP_SWAP_I64: + case Mips::ATOMIC_CMP_SWAP_I64_P8: + return EmitAtomicCmpSwap(MI, BB, 8); } } @@ -857,13 +908,31 @@ MachineBasicBlock * MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand) const { - assert(Size == 4 && "Unsupported size for EmitAtomicBinary."); + assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + unsigned LL, SC, AND, NOR, ZERO, BEQ; + + if (Size == 4) { + LL = IsN64 ? Mips::LL_P8 : Mips::LL; + SC = IsN64 ? Mips::SC_P8 : Mips::SC; + AND = Mips::AND; + NOR = Mips::NOR; + ZERO = Mips::ZERO; + BEQ = Mips::BEQ; + } + else { + LL = IsN64 ? Mips::LLD_P8 : Mips::LLD; + SC = IsN64 ? Mips::SCD_P8 : Mips::SCD; + AND = Mips::AND64; + NOR = Mips::NOR64; + ZERO = Mips::ZERO_64; + BEQ = Mips::BEQ64; + } unsigned OldVal = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -901,23 +970,20 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // sc success, storeval, 0(ptr) // beq success, $0, loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(LL), OldVal).addReg(Ptr).addImm(0); if (Nand) { // and andres, oldval, incr // nor storeval, $0, andres - BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr); - BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal) - .addReg(Mips::ZERO).addReg(AndRes); + BuildMI(BB, dl, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr); + BuildMI(BB, dl, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes); } else if (BinOpcode) { // <binop> storeval, oldval, incr BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); } else { StoreVal = Incr; } - BuildMI(BB, dl, TII->get(Mips::SC), Success) - .addReg(StoreVal).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); + BuildMI(BB, dl, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB); MI->eraseFromParent(); // The instruction is gone now. @@ -937,6 +1003,8 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL; + unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC; unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -1028,7 +1096,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // beq success,$0,loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); if (Nand) { // and andres, oldval, incr2 // nor binopres, $0, andres @@ -1051,7 +1119,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, .addReg(OldVal).addReg(Mask2); BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal0).addReg(NewVal); - BuildMI(BB, dl, TII->get(Mips::SC), Success) + BuildMI(BB, dl, TII->get(SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); @@ -1082,13 +1150,29 @@ MachineBasicBlock * MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const { - assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap."); + assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + unsigned LL, SC, ZERO, BNE, BEQ; + + if (Size == 4) { + LL = IsN64 ? Mips::LL_P8 : Mips::LL; + SC = IsN64 ? Mips::SC_P8 : Mips::SC; + ZERO = Mips::ZERO; + BNE = Mips::BNE; + BEQ = Mips::BEQ; + } + else { + LL = IsN64 ? Mips::LLD_P8 : Mips::LLD; + SC = IsN64 ? Mips::SCD_P8 : Mips::SCD; + ZERO = Mips::ZERO_64; + BNE = Mips::BNE64; + BEQ = Mips::BEQ64; + } unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -1127,18 +1211,18 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // ll dest, 0(ptr) // bne dest, oldval, exitMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::BNE)) + BuildMI(BB, dl, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(BNE)) .addReg(Dest).addReg(OldVal).addMBB(exitMBB); // loop2MBB: // sc success, newval, 0(ptr) // beq success, $0, loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::SC), Success) + BuildMI(BB, dl, TII->get(SC), Success) .addReg(NewVal).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); + BuildMI(BB, dl, TII->get(BEQ)) + .addReg(Success).addReg(ZERO).addMBB(loop1MBB); MI->eraseFromParent(); // The instruction is gone now. @@ -1157,6 +1241,8 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL; + unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC; unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -1247,7 +1333,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // and maskedoldval0,oldval,mask // bne maskedoldval0,shiftedcmpval,sinkMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) .addReg(OldVal).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::BNE)) @@ -1263,7 +1349,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, .addReg(OldVal).addReg(Mask2); BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, dl, TII->get(Mips::SC), Success) + BuildMI(BB, dl, TII->get(SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); @@ -1295,6 +1381,7 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP; assert(getTargetMachine().getFrameLowering()->getStackAlignment() >= cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() && @@ -1306,20 +1393,19 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const DebugLoc dl = Op.getDebugLoc(); // Get a reference from Mips stack pointer - SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32); + SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy()); // Subtract the dynamic size from the actual stack size to // obtain the new stack size. - SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size); + SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size); // The Sub result contains the new stack start address, so it // must be placed in the stack pointer register. - Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub, - SDValue()); + Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue()); // This node always has two return values: a new stack pointer // value and a chain - SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other); + SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other); SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy()); SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) }; @@ -1658,7 +1744,8 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Mips::FP, VT); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + IsN64 ? Mips::FP_64 : Mips::FP, VT); return FrameAddr; } @@ -1685,8 +1772,6 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, // Calling Convention Implementation //===----------------------------------------------------------------------===// -#include "MipsGenCallingConv.inc" - //===----------------------------------------------------------------------===// // TODO: Implement a generic logic using tblgen that can support this. // Mips O32 ABI rules: @@ -1793,6 +1878,70 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, return false; // CC must always match } +static const unsigned Mips64IntRegs[8] = + {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, + Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64}; +static const unsigned Mips64DPRegs[8] = + {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, + Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64}; + +static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + unsigned Align = std::max(ArgFlags.getByValAlign(), (unsigned)8); + unsigned Size = (ArgFlags.getByValSize() + 7) / 8 * 8; + unsigned FirstIdx = State.getFirstUnallocated(Mips64IntRegs, 8); + + assert(Align <= 16 && "Cannot handle alignments larger than 16."); + + // If byval is 16-byte aligned, the first arg register must be even. + if ((Align == 16) && (FirstIdx % 2)) { + State.AllocateReg(Mips64IntRegs[FirstIdx], Mips64DPRegs[FirstIdx]); + ++FirstIdx; + } + + // Mark the registers allocated. + for (unsigned I = FirstIdx; Size && (I < 8); Size -= 8, ++I) + State.AllocateReg(Mips64IntRegs[I], Mips64DPRegs[I]); + + // Allocate space on caller's stack. + unsigned Offset = State.AllocateStack(Size, Align); + + if (FirstIdx < 8) + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Mips64IntRegs[FirstIdx], + LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + + return true; +} + +#include "MipsGenCallingConv.inc" + +static void +AnalyzeMips64CallOperands(CCState CCInfo, + const SmallVectorImpl<ISD::OutputArg> &Outs) { + unsigned NumOps = Outs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + bool R; + + if (Outs[i].IsFixed) + R = CC_MipsN(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + else + R = CC_MipsN_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + + if (R) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1901,6 +2050,90 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, MachinePointerInfo(0), MachinePointerInfo(0)); } +// Copy Mips64 byVal arg to registers and stack. +void static +PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, + SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass, + SmallVector<SDValue, 8>& MemOpChains, int& LastFI, + MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, + const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + EVT PtrTy, bool isLittle) { + unsigned ByValSize = Flags.getByValSize(); + unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8); + bool IsRegLoc = VA.isRegLoc(); + unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. + unsigned LocMemOffset = 0; + + if (!IsRegLoc) + LocMemOffset = VA.getLocMemOffset(); + else { + const unsigned *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, + VA.getLocReg()); + const unsigned *RegEnd = Mips64IntRegs + 8; + + // Copy double words to registers. + for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) { + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue LoadVal = DAG.getLoad(MVT::i64, dl, Chain, LoadPtr, + MachinePointerInfo(), false, false, false, + Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + RegsToPass.push_back(std::make_pair(*Reg, LoadVal)); + } + + // If there is an argument register available, copy the remainder of the + // byval argument with sub-doubleword loads and shifts. + if ((Reg != RegEnd) && (ByValSize != Offset)) { + assert((ByValSize < Offset + 8) && + "Size of the remainder should be smaller than 8-byte."); + SDValue Val; + for (unsigned LoadSize = 4; Offset < ByValSize; LoadSize /= 2) { + unsigned RemSize = ByValSize - Offset; + + if (RemSize < LoadSize) + continue; + + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue LoadVal = + DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i64, Chain, LoadPtr, + MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), + false, false, Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // Offset in number of bits from double word boundary. + unsigned OffsetDW = (Offset % 8) * 8; + unsigned Shamt = isLittle ? OffsetDW : 64 - (OffsetDW + LoadSize * 8); + SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i64, LoadVal, + DAG.getConstant(Shamt, MVT::i32)); + + Val = Val.getNode() ? DAG.getNode(ISD::OR, dl, MVT::i64, Val, Shift) : + Shift; + Offset += LoadSize; + Alignment = std::min(Alignment, LoadSize); + } + + RegsToPass.push_back(std::make_pair(*Reg, Val)); + return; + } + } + + unsigned MemCpySize = ByValSize - Offset; + if (MemCpySize) { + // Create a fixed object on stack at offset LocMemOffset and copy + // remainder of byval arg to it with memcpy. + SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); + ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + } +} + /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isTailCall. @@ -1929,6 +2162,8 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, if (IsO32) CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); + else if (HasMips64) + AnalyzeMips64CallOperands(CCInfo, Outs); else CCInfo.AnalyzeCallOperands(Outs, CC_Mips); @@ -1987,6 +2222,22 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // ByVal Arg. + if (Flags.isByVal()) { + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + if (IsO32) + WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + MFI, DAG, Arg, VA, Flags, getPointerTy(), + Subtarget->isLittle()); + else + PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + MFI, DAG, Arg, VA, Flags, getPointerTy(), + Subtarget->isLittle()); + continue; + } // Promote the value if needed. switch (VA.getLocInfo()) { @@ -2032,18 +2283,6 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // Register can't get to this point... assert(VA.isMemLoc()); - // ByVal Arg. - ISD::ArgFlagsTy Flags = Outs[i].Flags; - if (Flags.isByVal()) { - assert(IsO32 && - "No support for ByVal args by ABIs other than O32 yet."); - assert(Flags.getByValSize() && - "ByVal args of size 0 should have been ignored by front-end."); - WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, - DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); - continue; - } - // Create the frame index object for this incoming parameter LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); @@ -2232,6 +2471,45 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, } } +// Create frame object on stack and copy registers used for byval passing to it. +static unsigned +CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, + std::vector<SDValue>& OutChains, SelectionDAG &DAG, + const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + MachineFrameInfo *MFI, bool IsRegLoc, + SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, + EVT PtrTy) { + const unsigned *Reg = Mips64IntRegs + 8; + int FOOffset; // Frame object offset from virtual frame pointer. + + if (IsRegLoc) { + Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg()); + FOOffset = (Reg - Mips64IntRegs) * 8 - 8 * 8; + } + else + FOOffset = VA.getLocMemOffset(); + + // Create frame object. + unsigned NumRegs = (Flags.getByValSize() + 7) / 8; + unsigned LastFI = MFI->CreateFixedObject(NumRegs * 8, FOOffset, true); + SDValue FIN = DAG.getFrameIndex(LastFI, PtrTy); + InVals.push_back(FIN); + + // Copy arg registers. + for (unsigned I = 0; (Reg != Mips64IntRegs + 8) && (I < NumRegs); + ++Reg, ++I) { + unsigned VReg = AddLiveIn(MF, *Reg, Mips::CPU64RegsRegisterClass); + SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN, + DAG.getConstant(I * 8, PtrTy)); + SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64), + StorePtr, MachinePointerInfo(), false, + false, 0); + OutChains.push_back(Store); + } + + return LastFI; +} + /// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue @@ -2267,9 +2545,28 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT ValVT = VA.getValVT(); + ISD::ArgFlagsTy Flags = Ins[i].Flags; + bool IsRegLoc = VA.isRegLoc(); + + if (Flags.isByVal()) { + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + if (IsO32) { + unsigned NumWords = (Flags.getByValSize() + 3) / 4; + LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(), + true); + SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); + InVals.push_back(FIN); + ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags); + } else // N32/64 + LastFI = CopyMips64ByValRegs(MF, Chain, dl, OutChains, DAG, VA, Flags, + MFI, IsRegLoc, InVals, MipsFI, + getPointerTy()); + continue; + } // Arguments stored on registers - if (VA.isRegLoc()) { + if (IsRegLoc) { EVT RegVT = VA.getLocVT(); unsigned ArgReg = VA.getLocReg(); TargetRegisterClass *RC = 0; @@ -2325,23 +2622,6 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // sanity check assert(VA.isMemLoc()); - ISD::ArgFlagsTy Flags = Ins[i].Flags; - - if (Flags.isByVal()) { - assert(IsO32 && - "No support for ByVal args by ABIs other than O32 yet."); - assert(Flags.getByValSize() && - "ByVal args of size 0 should have been ignored by front-end."); - unsigned NumWords = (Flags.getByValSize() + 3) / 4; - LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(), - true); - SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); - InVals.push_back(FIN); - ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags); - - continue; - } - // The stack pointer offset is relative to the caller stack frame. LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); @@ -2367,24 +2647,40 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } - if (isVarArg && IsO32) { + if (isVarArg) { + unsigned NumOfRegs = IsO32 ? 4 : 8; + const unsigned *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs); + int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot. + TargetRegisterClass *RC + = IsO32 ? Mips::CPURegsRegisterClass : Mips::CPU64RegsRegisterClass; + unsigned RegSize = RC->getSize(); + int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize; + + // Offset of the first variable argument from stack pointer. + int FirstVaArgOffset; + + if (IsO32 || (Idx == NumOfRegs)) { + FirstVaArgOffset = + (CCInfo.getNextStackOffset() + RegSize - 1) / RegSize * RegSize; + } else + FirstVaArgOffset = RegSlotOffset; + // Record the frame index of the first variable argument // which is a value necessary to VASTART. - unsigned NextStackOffset = CCInfo.getNextStackOffset(); - assert(NextStackOffset % 4 == 0 && - "NextStackOffset must be aligned to 4-byte boundaries."); - LastFI = MFI->CreateFixedObject(4, NextStackOffset, true); + LastFI = MFI->CreateFixedObject(RegSize, FirstVaArgOffset, true); MipsFI->setVarArgsFrameIndex(LastFI); - // If NextStackOffset is smaller than o32's 16-byte reserved argument area, - // copy the integer registers that have not been used for argument passing - // to the caller's stack frame. - for (; NextStackOffset < 16; NextStackOffset += 4) { - TargetRegisterClass *RC = Mips::CPURegsRegisterClass; - unsigned Idx = NextStackOffset / 4; - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), O32IntRegs[Idx], RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); - LastFI = MFI->CreateFixedObject(4, NextStackOffset, true); + // Copy the integer registers that have not been used for argument passing + // to the argument register save area. For O32, the save area is allocated + // in the caller's stack frame, while for N32/64, it is allocated in the + // callee's stack frame. + for (int StackOffset = RegSlotOffset; + Idx < NumOfRegs; ++Idx, StackOffset += RegSize) { + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegs[Idx], RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, + MVT::getIntegerVT(RegSize * 8)); + LastFI = MFI->CreateFixedObject(RegSize, StackOffset, true); SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, MachinePointerInfo(), diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 271d248..8fa3052 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -30,86 +30,6 @@ namespace Mips { unsigned GetOppositeBranchOpc(unsigned Opc); } -/// MipsII - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace MipsII { - /// Target Operand Flag enum. - enum TOF { - //===------------------------------------------------------------------===// - // Mips Specific MachineOperand flags. - - MO_NO_FLAG, - - /// MO_GOT - Represents the offset into the global offset table at which - /// the address the relocation entry symbol resides during execution. - MO_GOT, - - /// MO_GOT_CALL - Represents the offset into the global offset table at - /// which the address of a call site relocation entry symbol resides - /// during execution. This is different from the above since this flag - /// can only be present in call instructions. - MO_GOT_CALL, - - /// MO_GPREL - Represents the offset from the current gp value to be used - /// for the relocatable object file being produced. - MO_GPREL, - - /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol - /// address. - MO_ABS_HI, - MO_ABS_LO, - - /// MO_TLSGD - Represents the offset into the global offset table at which - // the module ID and TSL block offset reside during execution (General - // Dynamic TLS). - MO_TLSGD, - - /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial - // Exec TLS). - MO_GOTTPREL, - - /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from - // the thread pointer (Local Exec TLS). - MO_TPREL_HI, - MO_TPREL_LO, - - // N32/64 Flags. - MO_GPOFF_HI, - MO_GPOFF_LO, - MO_GOT_DISP, - MO_GOT_PAGE, - MO_GOT_OFST - }; - - enum { - //===------------------------------------------------------------------===// - // Instruction encodings. These are the standard/most common forms for - // Mips instructions. - // - - // Pseudo - This represents an instruction that is a pseudo instruction - // or one that has not been implemented yet. It is illegal to code generate - // it, but tolerated for intermediate implementation stages. - Pseudo = 0, - - /// FrmR - This form is for instructions of the format R. - FrmR = 1, - /// FrmI - This form is for instructions of the format I. - FrmI = 2, - /// FrmJ - This form is for instructions of the format J. - FrmJ = 3, - /// FrmFR - This form is for instructions of the format FR. - FrmFR = 4, - /// FrmFI - This form is for instructions of the format FI. - FrmFI = 5, - /// FrmOther - This form is for instructions that have no specific format. - FrmOther = 6, - - FormMask = 15 - }; -} - class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; bool IsN64; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 1cc3841..5dca9b6 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -39,8 +39,8 @@ def SDT_MipsDivRem : SDTypeProfile<0, 2, def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; -def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, iPTR>]>; +def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, + SDTCisSameAs<0, 1>]>; def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, @@ -138,8 +138,15 @@ def NotN64 : Predicate<"!Subtarget.isABI_N64()">; //===----------------------------------------------------------------------===// // Instruction operand types -def brtarget : Operand<OtherVT>; +def jmptarget : Operand<OtherVT> { + let EncoderMethod = "getJumpTargetOpValue"; +} +def brtarget : Operand<OtherVT> { + let EncoderMethod = "getBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; +} def calltarget : Operand<i32>; +def calltarget64: Operand<i64>; def simm16 : Operand<i32>; def simm16_64 : Operand<i64>; def shamt : Operand<i32>; @@ -167,6 +174,12 @@ def mem_ea : Operand<i32> { let EncoderMethod = "getMemEncoding"; } +def mem_ea_64 : Operand<i64> { + let PrintMethod = "printMemOperandEA"; + let MIOperandInfo = (ops CPU64Regs, simm16_64); + let EncoderMethod = "getMemEncoding"; +} + // size operand of ext instruction def size_ext : Operand<i32> { let EncoderMethod = "getSizeExtEncoding"; @@ -442,7 +455,7 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od, // Unconditional branch let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in class JumpFJ<bits<6> op, string instr_asm>: - FJ<op, (outs), (ins brtarget:$target), + FJ<op, (outs), (ins jmptarget:$target), !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>; let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in @@ -525,9 +538,9 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC, let Defs = DefRegs; } -class EffectiveAddress<string instr_asm> : - FMem<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr), - instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>; +class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> : + FMem<0x09, (outs RC:$rt), (ins Mem:$addr), + instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>; // Count Leading Ones/Zeros in Word class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>: @@ -587,20 +600,41 @@ class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins, } // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). -class Atomic2Ops<PatFrag Op, string Opstr> : - MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), +class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC, + RegisterClass PRC> : + MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), - [(set CPURegs:$dst, - (Op CPURegs:$ptr, CPURegs:$incr))]>; + [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; + +multiclass Atomic2Ops32<PatFrag Op, string Opstr> { + def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, Requires<[NotN64]>; + def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, Requires<[IsN64]>; +} // Atomic Compare & Swap. -class AtomicCmpSwap<PatFrag Op, string Width> : - MipsPseudo<(outs CPURegs:$dst), - (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap), - !strconcat("atomic_cmp_swap_", Width, - "\t$dst, $ptr, $cmp, $swap"), - [(set CPURegs:$dst, - (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>; +class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC, + RegisterClass PRC> : + MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), + !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), + [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; + +multiclass AtomicCmpSwap32<PatFrag Op, string Width> { + def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, Requires<[NotN64]>; + def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, Requires<[IsN64]>; +} + +class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : + FMem<Opc, (outs RC:$rt), (ins Mem:$addr), + !strconcat(opstring, "\t$rt, $addr"), [], IILoad> { + let mayLoad = 1; +} + +class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : + FMem<Opc, (outs RC:$dst), (ins RC:$rt, Mem:$addr), + !strconcat(opstring, "\t$rt, $addr"), [], IIStore> { + let mayStore = 1; + let Constraints = "$rt = $dst"; +} //===----------------------------------------------------------------------===// // Pseudo instructions @@ -636,32 +670,32 @@ def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; let usesCustomInserter = 1 in { - def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, "load_add_8">; - def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, "load_add_16">; - def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, "load_add_32">; - def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, "load_sub_8">; - def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, "load_sub_16">; - def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, "load_sub_32">; - def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, "load_and_8">; - def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, "load_and_16">; - def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, "load_and_32">; - def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, "load_or_8">; - def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, "load_or_16">; - def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, "load_or_32">; - def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, "load_xor_8">; - def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, "load_xor_16">; - def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, "load_xor_32">; - def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, "load_nand_8">; - def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, "load_nand_16">; - def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, "load_nand_32">; - - def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, "swap_8">; - def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, "swap_16">; - def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, "swap_32">; - - def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, "8">; - def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, "16">; - def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, "32">; + defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">; + defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16, "load_add_16">; + defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32, "load_add_32">; + defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8, "load_sub_8">; + defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16, "load_sub_16">; + defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32, "load_sub_32">; + defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8, "load_and_8">; + defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16, "load_and_16">; + defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32, "load_and_32">; + defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8, "load_or_8">; + defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16, "load_or_16">; + defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32, "load_or_32">; + defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8, "load_xor_8">; + defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16, "load_xor_16">; + defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32, "load_xor_32">; + defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8, "load_nand_8">; + defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16, "load_nand_16">; + defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32, "load_nand_32">; + + defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8, "swap_8">; + defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16, "swap_16">; + defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32, "swap_32">; + + defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8, "8">; + defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16, "16">; + defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32, "32">; } //===----------------------------------------------------------------------===// @@ -738,12 +772,10 @@ def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", } /// Load-linked, Store-conditional -let mayLoad = 1 in - def LL : FMem<0x30, (outs CPURegs:$rt), (ins mem:$addr), - "ll\t$rt, $addr", [], IILoad>; -let mayStore = 1, Constraints = "$rt = $dst" in - def SC : FMem<0x38, (outs CPURegs:$dst), (ins CPURegs:$rt, mem:$addr), - "sc\t$rt, $addr", [], IIStore>; +def LL : LLBase<0x30, "ll", CPURegs, mem>, Requires<[NotN64]>; +def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]>; +def SC : SCBase<0x38, "sc", CPURegs, mem>, Requires<[NotN64]>; +def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>; /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; @@ -798,13 +830,13 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">; +def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>; // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">; +def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>; // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 6c0e4f6..1fab52c 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -15,6 +15,7 @@ #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index be27606..bc30b6b 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -51,16 +51,12 @@ private: mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; - // Size of area on callee's stack frame which is used to save va_arg or - // byval arguments passed in registers. - unsigned RegSaveAreaSize; - public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0), RegSaveAreaSize(0) + MaxCallFrameSize(0) {} bool isInArgFI(int FI) const { @@ -104,11 +100,6 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } - - unsigned getRegSaveAreaSize() const { return RegSaveAreaSize; } - void setRegSaveAreaSize(unsigned S) { - if (RegSaveAreaSize < S) RegSaveAreaSize = S; - } }; } // end of namespace llvm diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt index e8035af..90ae260 100644 --- a/lib/Target/Mips/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = MipsInfo parent = Mips -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = Mips diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index 46a458c..6709c1b 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -36,6 +36,7 @@ add_llvm_library_dependencies(LLVMPTXCodeGen LLVMSelectionDAG LLVMSupport LLVMTarget + LLVMTransformUtils ) add_subdirectory(TargetInfo) diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp index aabb404..2f6c92d 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -96,9 +96,23 @@ void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) { O << "), "; } - O << *(MI->getOperand(Index++).getExpr()) << ", ("; - + const MCExpr* Expr = MI->getOperand(Index++).getExpr(); unsigned NumArgs = MI->getOperand(Index++).getImm(); + + // if the function call is to printf or puts, change to vprintf + if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) { + const MCSymbol &Sym = SymRefExpr->getSymbol(); + if (Sym.getName() == "printf" || Sym.getName() == "puts") { + O << "vprintf"; + } else { + O << Sym.getName(); + } + } else { + O << *Expr; + } + + O << ", ("; + if (NumArgs > 0) { printOperand(MI, Index++, O); for (unsigned i = 1; i < NumArgs; ++i) { diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt index 27807e6..22c70de 100644 --- a/lib/Target/PTX/LLVMBuild.txt +++ b/lib/Target/PTX/LLVMBuild.txt @@ -25,6 +25,6 @@ has_asmprinter = 1 type = Library name = PTXCodeGen parent = PTX -required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo Scalar SelectionDAG Support Target TransformUtils +required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils add_to_library_groups = PTX diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt index 811ef4b..94dbcee 100644 --- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt @@ -5,8 +5,8 @@ add_llvm_library(LLVMPTXDesc add_llvm_library_dependencies(LLVMPTXDesc LLVMMC - LLVMPTXInfo LLVMPTXAsmPrinter + LLVMPTXInfo LLVMSupport ) diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 45a6afc..bdf238b 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -165,6 +165,11 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) OutStreamer.AddBlankLine(); + // declare external functions + for (Module::const_iterator i = M.begin(), e = M.end(); + i != e; ++i) + EmitFunctionDeclaration(i); + // declare global variables for (Module::const_global_iterator i = M.global_begin(), e = M.global_end(); i != e; ++i) @@ -454,6 +459,31 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitRawText(os.str()); } +void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func) +{ + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + + std::string decl = ""; + + // hard-coded emission of extern vprintf function + + if (func->getName() == "printf" || func->getName() == "puts") { + decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b"; + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + decl += " __param_2, .param .b"; + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + decl += " __param_3)\n"; + } + + OutStreamer.EmitRawText(Twine(decl)); +} + unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, StringRef DirName) { // If FE did not provide a file name, then assume stdin. diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h index 538c080..d5ea4db 100644 --- a/lib/Target/PTX/PTXAsmPrinter.h +++ b/lib/Target/PTX/PTXAsmPrinter.h @@ -47,7 +47,7 @@ public: private: void EmitVariableDeclaration(const GlobalVariable *gv); - void EmitFunctionDeclaration(); + void EmitFunctionDeclaration(const Function* func); StringMap<unsigned> SourceIdMap; }; // class PTXAsmPrinter diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index 3307d91..17191fb 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -46,6 +47,11 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setMinFunctionAlignment(2); + // Let LLVM use loads/stores for all mem* operations + maxStoresPerMemcpy = 4096; + maxStoresPerMemmove = 4096; + maxStoresPerMemset = 4096; + //////////////////////////////////// /////////// Expansion ////////////// //////////////////////////////////// @@ -352,40 +358,101 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVectorImpl<SDValue> &InVals) const { MachineFunction& MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); - PTXParamManager &PM = MFI->getParamManager(); - + PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = PTXMFI->getParamManager(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && "Calls are not handled for the target device"); + // Identify the callee function + const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + const Function *function = cast<Function>(GV); + + // allow non-device calls only for printf + bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; + + assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) && + "PTX function calls must be to PTX device functions"); + + unsigned outSize = isPrintf ? 2 : Outs.size(); + std::vector<SDValue> Ops; // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] - Ops.resize(Outs.size() + Ins.size() + 4); + Ops.resize(outSize + Ins.size() + 4); Ops[0] = Chain; // Identify the callee function - const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); - assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device && - "PTX function calls must be to PTX device functions"); Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); Ops[Ins.size()+2] = Callee; - // Generate STORE_PARAM nodes for each function argument. In PTX, function - // arguments are explicitly stored into .param variables and passed as - // arguments. There is no register/stack-based calling convention in PTX. - Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32); - for (unsigned i = 0; i != OutVals.size(); ++i) { - unsigned Size = OutVals[i].getValueType().getSizeInBits(); - unsigned Param = PM.addLocalParam(Size); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); + // #Outs + Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32); + + if (isPrintf) { + // first argument is the address of the global string variable in memory + unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits()); + SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(), + MVT::Other); Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, - ParamValue, OutVals[i]); - Ops[i+Ins.size()+4] = ParamValue; - } + ParamValue0, OutVals[0]); + Ops[Ins.size()+4] = ParamValue0; + + // alignment is the maximum size of all the arguments + unsigned alignment = 0; + for (unsigned i = 1; i < OutVals.size(); ++i) { + alignment = std::max(alignment, + OutVals[i].getValueType().getSizeInBits()); + } + + // size is the alignment multiplied by the number of arguments + unsigned size = alignment * (OutVals.size() - 1); + + // second argument is the address of the stack object (unless no arguments) + unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits()); + SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(), + MVT::Other); + Ops[Ins.size()+5] = ParamValue1; + + if (size > 0) + { + // create a local stack object to store the arguments + unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false); + SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy()); + + // store each of the arguments to the stack in turn + for (unsigned int i = 1; i != OutVals.size(); i++) { + SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy())); + Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr, + MachinePointerInfo(), + false, false, 0); + } + // copy the address of the local frame index to get the address in non-local space + SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex); + + // store this address in the second argument + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr); + } + } + else + { + // Generate STORE_PARAM nodes for each function argument. In PTX, function + // arguments are explicitly stored into .param variables and passed as + // arguments. There is no register/stack-based calling convention in PTX. + for (unsigned i = 0; i != OutVals.size(); ++i) { + unsigned Size = OutVals[i].getValueType().getSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[i]); + Ops[i+Ins.size()+4] = ParamValue; + } + } + std::vector<SDValue> InParams; // Generate list of .param variables to hold the return value(s). diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index fbddac5..bcd5bcf 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -680,6 +680,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOVaddr64 : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; + def MOVframe32 + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a", + [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>; + def MOVframe64 + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a", + [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>; } // PTX cvt instructions diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt index f35c237..8e5285a 100644 --- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = PTXInfo parent = PTX -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = PTX diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt index 1f5d3e7..f51b417 100644 --- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = PowerPCInfo parent = PowerPC -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = PowerPC diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt index 22f4e1f..81c9032 100644 --- a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = SparcInfo parent = Sparc -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = Sparc diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index b590199..4542d4b 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -62,6 +62,8 @@ add_llvm_library_dependencies(LLVMX86CodeGen LLVMTarget LLVMX86AsmPrinter LLVMX86Desc + LLVMX86Info + LLVMX86Utils ) add_subdirectory(AsmParser) diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 8721912..264e791 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -10,7 +10,6 @@ add_llvm_library_dependencies(LLVMX86Desc LLVMMC LLVMSupport LLVMX86AsmPrinter - LLVMX86AsmPrinter LLVMX86Info ) diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt index 6b2635b..ee015bd 100644 --- a/lib/Target/X86/TargetInfo/LLVMBuild.txt +++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = X86Info parent = X86 -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = X86 diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 93f7de8..4e11131 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -924,10 +924,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); - // Can turn SHL into an integer multiply. - setOperationAction(ISD::SHL, MVT::v4i32, Custom); - setOperationAction(ISD::SHL, MVT::v16i8, Custom); - setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); setOperationAction(ISD::VSELECT, MVT::v2i64, Legal); setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); @@ -948,25 +944,41 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + // FIXME: these should be Legal but thats only for the case where + // the index is constant. For now custom expand to deal with that if (Subtarget->is64Bit()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); } } if (Subtarget->hasXMMInt()) { - setOperationAction(ISD::SRL, MVT::v2i64, Custom); - setOperationAction(ISD::SRL, MVT::v4i32, Custom); - setOperationAction(ISD::SRL, MVT::v16i8, Custom); setOperationAction(ISD::SRL, MVT::v8i16, Custom); + setOperationAction(ISD::SRL, MVT::v16i8, Custom); - setOperationAction(ISD::SHL, MVT::v2i64, Custom); - setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v8i16, Custom); + setOperationAction(ISD::SHL, MVT::v16i8, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Custom); setOperationAction(ISD::SRA, MVT::v8i16, Custom); setOperationAction(ISD::SRA, MVT::v16i8, Custom); + + if (Subtarget->hasAVX2()) { + setOperationAction(ISD::SRL, MVT::v2i64, Legal); + setOperationAction(ISD::SRL, MVT::v4i32, Legal); + + setOperationAction(ISD::SHL, MVT::v2i64, Legal); + setOperationAction(ISD::SHL, MVT::v4i32, Legal); + + setOperationAction(ISD::SRA, MVT::v4i32, Legal); + } else { + setOperationAction(ISD::SRL, MVT::v2i64, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Custom); + + setOperationAction(ISD::SHL, MVT::v2i64, Custom); + setOperationAction(ISD::SHL, MVT::v4i32, Custom); + + setOperationAction(ISD::SRA, MVT::v4i32, Custom); + } } if (Subtarget->hasSSE42() || Subtarget->hasAVX()) @@ -1009,18 +1021,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom); - setOperationAction(ISD::SRL, MVT::v4i64, Custom); - setOperationAction(ISD::SRL, MVT::v8i32, Custom); setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); - setOperationAction(ISD::SHL, MVT::v4i64, Custom); - setOperationAction(ISD::SHL, MVT::v8i32, Custom); setOperationAction(ISD::SHL, MVT::v16i16, Custom); setOperationAction(ISD::SHL, MVT::v32i8, Custom); - setOperationAction(ISD::SRA, MVT::v8i32, Custom); setOperationAction(ISD::SRA, MVT::v16i16, Custom); + setOperationAction(ISD::SRA, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); @@ -1050,21 +1058,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, Legal); setOperationAction(ISD::MUL, MVT::v16i16, Legal); + // Don't lower v32i8 because there is no 128-bit byte mul setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - setOperationAction(ISD::SHL, MVT::v4i32, Legal); - setOperationAction(ISD::SHL, MVT::v2i64, Legal); - setOperationAction(ISD::SRL, MVT::v4i32, Legal); - setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SRA, MVT::v4i32, Legal); - - setOperationAction(ISD::SHL, MVT::v8i32, Legal); - setOperationAction(ISD::SHL, MVT::v4i64, Legal); - setOperationAction(ISD::SRL, MVT::v8i32, Legal); - setOperationAction(ISD::SRL, MVT::v4i64, Legal); - setOperationAction(ISD::SRA, MVT::v8i32, Legal); - // Don't lower v32i8 because there is no 128-bit byte mul + setOperationAction(ISD::SRL, MVT::v4i64, Legal); + setOperationAction(ISD::SRL, MVT::v8i32, Legal); + + setOperationAction(ISD::SHL, MVT::v4i64, Legal); + setOperationAction(ISD::SHL, MVT::v8i32, Legal); + + setOperationAction(ISD::SRA, MVT::v8i32, Legal); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -1080,6 +1084,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); // Don't lower v32i8 because there is no 128-bit byte mul + + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); + + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); + + setOperationAction(ISD::SRA, MVT::v8i32, Custom); } // Custom lower several nodes for 256-bit types. @@ -6613,7 +6625,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); - bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; @@ -6622,9 +6633,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); - // Shuffle operations on MMX not supported. - if (isMMX) - return Op; + assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); // Vector shuffle lowering takes 3 steps: // @@ -6636,7 +6645,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // so the shuffle can be broken into other shuffles and the legalizer can // try the lowering again. // - // The general ideia is that no vector_shuffle operation should be left to + // The general idea is that no vector_shuffle operation should be left to // be matched during isel, all of them must be converted to a target specific // node here. @@ -6956,8 +6965,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, Op.getOperand(0)), Op.getOperand(1)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract); - } else if (VT == MVT::i32) { - // ExtractPS works with constant index. + } else if (VT == MVT::i32 || VT == MVT::i64) { + // ExtractPS/pextrq works with constant index. if (isa<ConstantSDNode>(Op.getOperand(1))) return Op; } @@ -7096,7 +7105,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); - } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) { + } else if ((EltVT == MVT::i32 || EltVT == MVT::i64) && + isa<ConstantSDNode>(N2)) { // PINSR* works with constant index. return Op; } @@ -9522,6 +9532,14 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const // Fix vector shift instructions where the last operand is a non-immediate // i32 value. + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: @@ -9569,6 +9587,30 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_sse2_psrai_d: NewIntNo = Intrinsic::x86_sse2_psra_d; break; + case Intrinsic::x86_avx2_pslli_w: + NewIntNo = Intrinsic::x86_avx2_psll_w; + break; + case Intrinsic::x86_avx2_pslli_d: + NewIntNo = Intrinsic::x86_avx2_psll_d; + break; + case Intrinsic::x86_avx2_pslli_q: + NewIntNo = Intrinsic::x86_avx2_psll_q; + break; + case Intrinsic::x86_avx2_psrli_w: + NewIntNo = Intrinsic::x86_avx2_psrl_w; + break; + case Intrinsic::x86_avx2_psrli_d: + NewIntNo = Intrinsic::x86_avx2_psrl_d; + break; + case Intrinsic::x86_avx2_psrli_q: + NewIntNo = Intrinsic::x86_avx2_psrl_q; + break; + case Intrinsic::x86_avx2_psrai_w: + NewIntNo = Intrinsic::x86_avx2_psra_w; + break; + case Intrinsic::x86_avx2_psrai_d: + NewIntNo = Intrinsic::x86_avx2_psra_d; + break; default: { ShAmtVT = MVT::v2i32; switch (IntNo) { @@ -10130,47 +10172,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasXMMInt()) return SDValue(); - // Decompose 256-bit shifts into smaller 128-bit shifts. - if (VT.getSizeInBits() == 256) { - int NumElems = VT.getVectorNumElements(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); - - // Extract the two vectors - SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl); - SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); - - // Recreate the shift amount vectors - SDValue Amt1, Amt2; - if (Amt.getOpcode() == ISD::BUILD_VECTOR) { - // Constant shift amount - SmallVector<SDValue, 4> Amt1Csts; - SmallVector<SDValue, 4> Amt2Csts; - for (int i = 0; i < NumElems/2; ++i) - Amt1Csts.push_back(Amt->getOperand(i)); - for (int i = NumElems/2; i < NumElems; ++i) - Amt2Csts.push_back(Amt->getOperand(i)); - - Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, - &Amt1Csts[0], NumElems/2); - Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, - &Amt2Csts[0], NumElems/2); - } else { - // Variable shift amount - Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl); - Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); - } - - // Issue new vector shifts for the smaller types - V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1); - V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2); - - // Concatenate the result back - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2); - } - // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { SDValue SclrAmt = Amt->getOperand(0); @@ -10259,6 +10260,48 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; } + + if (Subtarget->hasAVX2()) { + if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + } } } @@ -10328,6 +10371,48 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { R, DAG.getNode(ISD::ADD, dl, VT, R, R)); return R; } + + // Decompose 256-bit shifts into smaller 128-bit shifts. + if (VT.getSizeInBits() == 256) { + int NumElems = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + // Extract the two vectors + SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + + // Recreate the shift amount vectors + SDValue Amt1, Amt2; + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + // Constant shift amount + SmallVector<SDValue, 4> Amt1Csts; + SmallVector<SDValue, 4> Amt2Csts; + for (int i = 0; i < NumElems/2; ++i) + Amt1Csts.push_back(Amt->getOperand(i)); + for (int i = NumElems/2; i < NumElems; ++i) + Amt2Csts.push_back(Amt->getOperand(i)); + + Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, + &Amt1Csts[0], NumElems/2); + Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, + &Amt2Csts[0], NumElems/2); + } else { + // Variable shift amount + Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl); + Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + } + + // Issue new vector shifts for the smaller types + V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1); + V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2); + + // Concatenate the result back + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2); + } + return SDValue(); } @@ -10951,12 +11036,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; + case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::FHADD: return "X86ISD::FHADD"; + case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; - case X86ISD::FHADD: return "X86ISD::FHADD"; - case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; @@ -10996,6 +11082,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; case X86ISD::ANDN: return "X86ISD::ANDN"; + case X86ISD::BLSI: return "X86ISD::BLSI"; + case X86ISD::BLSMSK: return "X86ISD::BLSMSK"; + case X86ISD::BLSR: return "X86ISD::BLSR"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; @@ -13387,7 +13476,9 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (!Subtarget->hasXMMInt()) return SDValue(); - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) + if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && + (!Subtarget->hasAVX2() || + (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) return SDValue(); SDValue ShAmtOp = N->getOperand(1); @@ -13460,6 +13551,18 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), ValOp, BaseShAmt); + if (VT == MVT::v4i64) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v8i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v16i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), + ValOp, BaseShAmt); break; case ISD::SRA: if (VT == MVT::v4i32) @@ -13470,6 +13573,14 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32), ValOp, BaseShAmt); + if (VT == MVT::v8i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v16i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32), + ValOp, BaseShAmt); break; case ISD::SRL: if (VT == MVT::v2i64) @@ -13484,6 +13595,18 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), ValOp, BaseShAmt); + if (VT == MVT::v4i64) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v8i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v16i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), + ValOp, BaseShAmt); break; } return SDValue(); diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 102911f..9428fff 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -456,6 +456,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, + { X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 }, + { X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 }, + { X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 }, { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 }, { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 }, { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, @@ -508,6 +511,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 }, { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, + { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 }, + { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 }, + { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 }, { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 }, { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 }, { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 }, @@ -526,7 +532,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 }, { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, - { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 } + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, + // AVX2 foldable instructions + { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_16 }, + { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_16 }, + { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_16 }, + { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_16 }, + { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_16 }, + { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_16 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -652,6 +665,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, { X86::MINSSrr, X86::MINSSrm, 0 }, { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, + { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, { X86::MULSDrr, X86::MULSDrm, 0 }, @@ -664,30 +678,44 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 }, { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 }, { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 }, + { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 }, { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 }, { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 }, { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 }, { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 }, { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 }, { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 }, + { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 }, + { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 }, { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 }, + { X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 }, { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 }, { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, + { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 }, { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 }, { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 }, { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, + { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, + { X86::PHADDDrr128, X86::PHADDDrm128, TB_ALIGN_16 }, + { X86::PHADDWrr128, X86::PHADDWrm128, TB_ALIGN_16 }, + { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, + { X86::PHSUBDrr128, X86::PHSUBDrm128, TB_ALIGN_16 }, + { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, + { X86::PHSUBWrr128, X86::PHSUBWrm128, TB_ALIGN_16 }, { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, + { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 }, { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, + { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 }, { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 }, { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 }, @@ -695,6 +723,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, + { X86::PSHUFBrr128, X86::PSHUFBrm128, TB_ALIGN_16 }, + { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 }, + { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 }, + { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 }, { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, @@ -816,6 +848,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 }, { X86::VMINSSrr, X86::VMINSSrm, 0 }, { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 }, + { X86::VMPSADBWrri, X86::VMPSADBWrmi, TB_ALIGN_16 }, { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 }, { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 }, { X86::VMULSDrr, X86::VMULSDrm, 0 }, @@ -824,28 +857,44 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 }, { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 }, { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 }, + { X86::VPACKUSDWrr, X86::VPACKUSDWrm, TB_ALIGN_16 }, { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 }, { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 }, { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 }, { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 }, { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 }, { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 }, + { X86::VPADDUSBrr, X86::VPADDUSBrm, TB_ALIGN_16 }, + { X86::VPADDUSWrr, X86::VPADDUSWrm, TB_ALIGN_16 }, { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 }, + { X86::VPALIGNR128rr, X86::VPALIGNR128rm, TB_ALIGN_16 }, { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 }, { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 }, + { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 }, + { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 }, { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 }, { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 }, + { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 }, { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 }, { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 }, { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, + { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 }, { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, + { X86::VPHADDDrr128, X86::VPHADDDrm128, TB_ALIGN_16 }, + { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 }, + { X86::VPHADDWrr128, X86::VPHADDWrm128, TB_ALIGN_16 }, + { X86::VPHSUBDrr128, X86::VPHSUBDrm128, TB_ALIGN_16 }, + { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 }, + { X86::VPHSUBWrr128, X86::VPHSUBWrm128, TB_ALIGN_16 }, { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, + { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 }, { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 }, { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 }, { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 }, { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 }, { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 }, { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 }, + { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 }, { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 }, { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 }, { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 }, @@ -853,6 +902,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, + { X86::VPSHUFBrr128, X86::VPSHUFBrm128, TB_ALIGN_16 }, + { X86::VPSIGNBrr128, X86::VPSIGNBrm128, TB_ALIGN_16 }, + { X86::VPSIGNWrr128, X86::VPSIGNWrm128, TB_ALIGN_16 }, + { X86::VPSIGNDrr128, X86::VPSIGNDrm128, TB_ALIGN_16 }, { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, @@ -886,7 +939,91 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 }, { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 }, { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 }, - { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 } + { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }, + // AVX2 foldable instructions + { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_16 }, + { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_16 }, + { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_16 }, + { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_16 }, + { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_16 }, + { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_16 }, + { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_16 }, + { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_16 }, + { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_16 }, + { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_16 }, + { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_16 }, + { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_16 }, + { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_16 }, + { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_16 }, + { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_16 }, + { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_16 }, + { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_16 }, + { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_16 }, + { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_16 }, + { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_16 }, + { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_16 }, + { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_16 }, + { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_16 }, + { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_16 }, + { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_16 }, + { X86::VPHADDDrr256, X86::VPHADDDrm256, TB_ALIGN_16 }, + { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_16 }, + { X86::VPHADDWrr256, X86::VPHADDWrm256, TB_ALIGN_16 }, + { X86::VPHSUBDrr256, X86::VPHSUBDrm256, TB_ALIGN_16 }, + { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_16 }, + { X86::VPHSUBWrr256, X86::VPHSUBWrm256, TB_ALIGN_16 }, + { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_16 }, + { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_16 }, + { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_16 }, + { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_16 }, + { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_16 }, + { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_16 }, + { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_16 }, + { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_16 }, + { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_16 }, + { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_16 }, + { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_16 }, + { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_16 }, + { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_16 }, + { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_16 }, + { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_16 }, + { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_16 }, + { X86::VPSHUFBrr256, X86::VPSHUFBrm256, TB_ALIGN_16 }, + { X86::VPSIGNBrr256, X86::VPSIGNBrm256, TB_ALIGN_16 }, + { X86::VPSIGNWrr256, X86::VPSIGNWrm256, TB_ALIGN_16 }, + { X86::VPSIGNDrr256, X86::VPSIGNDrm256, TB_ALIGN_16 }, + { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 }, + { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 }, + { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 }, + { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 }, + { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_16 }, + { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 }, + { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_16 }, + { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 }, + { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 }, + { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 }, + { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_16 }, + { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 }, + { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 }, + { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 }, + { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 }, + { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_16 }, + { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 }, + { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_16 }, + { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_16 }, + { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_16 }, + { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_16 }, + { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_16 }, + { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_16 }, + { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_16 }, + { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_16 }, + { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_16 }, + { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_16 }, + { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_16 }, // FIXME: add AVX 256-bit foldable instructions }; @@ -2624,6 +2761,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, /// static bool hasPartialRegUpdate(unsigned Opcode) { switch (Opcode) { + case X86::CVTSI2SSrr: + case X86::CVTSI2SS64rr: + case X86::CVTSI2SDrr: + case X86::CVTSI2SD64rr: case X86::CVTSD2SSrr: case X86::Int_CVTSD2SSrr: case X86::CVTSS2SDrr: @@ -2652,6 +2793,54 @@ static bool hasPartialRegUpdate(unsigned Opcode) { return false; } +/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle +/// instructions we would like before a partial register update. +unsigned X86InstrInfo:: +getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode())) + return 0; + + // If MI is marked as reading Reg, the partial register update is wanted. + const MachineOperand &MO = MI->getOperand(0); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MO.readsReg() || MI->readsVirtualRegister(Reg)) + return 0; + } else { + if (MI->readsRegister(Reg, TRI)) + return 0; + } + + // If any of the preceding 16 instructions are reading Reg, insert a + // dependency breaking instruction. The magic number is based on a few + // Nehalem experiments. + return 16; +} + +void X86InstrInfo:: +breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + unsigned Reg = MI->getOperand(OpNum).getReg(); + if (X86::VR128RegClass.contains(Reg)) { + // These instructions are all floating point domain, so xorps is the best + // choice. + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr; + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg) + .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); + } else if (X86::VR256RegClass.contains(Reg)) { + // Use vxorps to clear the full ymm register. + // It wants to read and write the xmm sub-register. + unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg) + .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef) + .addReg(Reg, RegState::ImplicitDefine); + } else + return; + MI->addRegisterKilled(Reg, TRI, true); +} + MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 97009db..ee488d8 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -345,6 +345,11 @@ public: void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const; + void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, unsigned OpNum, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 91c84dd..6deee4f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -80,8 +80,9 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, string OpcodeStr, X86MemOperand x86memop, list<dag> pat_rr, list<dag> pat_rm, - bit Is2Addr = 1> { - let isCommutable = 1 in + bit Is2Addr = 1, + bit rr_hasSideEffects = 0> { + let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), @@ -519,6 +520,8 @@ let Predicates = [HasSSE2] in { // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. + def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), @@ -646,6 +649,9 @@ let Predicates = [HasAVX] in { // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. + def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2), + sub_sd))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; @@ -2629,7 +2635,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, [], [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V; + (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, @@ -2926,12 +2932,15 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { + let neverHasSideEffects = 1 in { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + let mayLoad = 1 in def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + } def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, @@ -3799,14 +3808,15 @@ let ExeDomain = SSEPackedInt in { (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "psrldq\t{$src2, $dst|$dst, $src2}", []>; // PSRADQri doesn't exist in SSE[1-3]. - } - def PANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; + def PANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", []>; - def PANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; + let mayLoad = 1 in + def PANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", []>; + } } } // Constraints = "$src1 = $dst" @@ -5348,6 +5358,7 @@ let Predicates = [HasAVX] in { //===---------------------------------------------------------------------===// multiclass ssse3_palign<string asm, bit Is2Addr = 1> { + let neverHasSideEffects = 1 in { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !if(Is2Addr, @@ -5355,6 +5366,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), []>, OpSize; + let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !if(Is2Addr, @@ -5362,19 +5374,23 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), []>, OpSize; + } } multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> { + let neverHasSideEffects = 1 in { def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; + let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; + } } let Predicates = [HasAVX] in @@ -5721,6 +5737,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>, OpSize; + let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, @@ -5743,6 +5760,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">; /// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { + let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, @@ -6720,19 +6738,21 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in { defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; } -let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in { +let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + let mayLoad = 1 in def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; } -let Defs = [XMM0, EFLAGS] in { +let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + let mayLoad = 1 in def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; @@ -6756,19 +6776,21 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { } let Predicates = [HasAVX], - Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + let mayLoad = 1 in def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; } -let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + let mayLoad = 1 in def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; @@ -7071,12 +7093,14 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), //===----------------------------------------------------------------------===// // Carry-less Multiplication instructions +let neverHasSideEffects = 1 in { let Constraints = "$src1 = $dst" in { def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; +let mayLoad = 1 in def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", @@ -7089,10 +7113,12 @@ def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; +let mayLoad = 1 in def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; +} multiclass pclmul_alias<string asm, int immop> { @@ -7655,7 +7681,6 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", // Variable Bit Shifts // multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, - PatFrag pf128, PatFrag pf256, Intrinsic Int128, Intrinsic Int256> { def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -7664,7 +7689,8 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (Int128 VR128:$src1, (pf128 addr:$src2)))]>, + [(set VR128:$dst, + (Int128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>, VEX_4V; def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), @@ -7673,26 +7699,47 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int256 VR256:$src1, (pf256 addr:$src2)))]>, + [(set VR256:$dst, + (Int256 VR256:$src1, (bitconvert (memopv4i64 addr:$src2))))]>, VEX_4V; } -defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", memopv4i32, memopv8i32, - int_x86_avx2_psllv_d, int_x86_avx2_psllv_d_256>; -defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", memopv2i64, memopv4i64, - int_x86_avx2_psllv_q, int_x86_avx2_psllv_q_256>, - VEX_W; -defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", memopv4i32, memopv8i32, - int_x86_avx2_psrlv_d, int_x86_avx2_psrlv_d_256>; -defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", memopv2i64, memopv4i64, - int_x86_avx2_psrlv_q, int_x86_avx2_psrlv_q_256>, - VEX_W; -defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32, - int_x86_avx2_psrav_d, int_x86_avx2_psrav_d_256>; +multiclass avx2_var_shift_i64<bits<8> opc, string OpcodeStr, + Intrinsic Int128, Intrinsic Int256> { + def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V; + def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (Int128 VR128:$src1, (memopv2i64 addr:$src2)))]>, + VEX_4V; + def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V; + def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (Int256 VR256:$src1, (memopv4i64 addr:$src2)))]>, + VEX_4V; +} +defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", int_x86_avx2_psllv_d, + int_x86_avx2_psllv_d_256>; +defm VPSLLVQ : avx2_var_shift_i64<0x47, "vpsllvq", int_x86_avx2_psllv_q, + int_x86_avx2_psllv_q_256>, VEX_W; +defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", int_x86_avx2_psrlv_d, + int_x86_avx2_psrlv_d_256>; +defm VPSRLVQ : avx2_var_shift_i64<0x45, "vpsrlvq", int_x86_avx2_psrlv_q, + int_x86_avx2_psrlv_q_256>, VEX_W; +defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", int_x86_avx2_psrav_d, + int_x86_avx2_psrav_d_256>; let Predicates = [HasAVX2] in { - def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), (VPSLLVDrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), @@ -7714,29 +7761,30 @@ let Predicates = [HasAVX2] in { def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))), (VPSRAVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), - (VPSLLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv2i64 addr:$src2))), + def : Pat<(v4i32 (shl (v4i32 VR128:$src1), + (v4i32 (bitconvert (memopv2i64 addr:$src2))))), (VPSLLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (shl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))), + def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))), (VPSLLVQrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (srl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), + def : Pat<(v4i32 (srl (v4i32 VR128:$src1), + (v4i32 (bitconvert (memopv2i64 addr:$src2))))), (VPSRLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (srl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))), + def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))), (VPSRLVQrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (sra (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), + def : Pat<(v4i32 (sra (v4i32 VR128:$src1), + (v4i32 (bitconvert (memopv2i64 addr:$src2))))), (VPSRAVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + def : Pat<(v8i32 (shl (v8i32 VR256:$src1), + (v8i32 (bitconvert (memopv4i64 addr:$src2))))), (VPSLLVDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (shl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))), + def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))), (VPSLLVQYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (srl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + def : Pat<(v8i32 (srl (v8i32 VR256:$src1), + (v8i32 (bitconvert (memopv4i64 addr:$src2))))), (VPSRLVDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (srl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))), + def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))), (VPSRLVQYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (sra (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + def : Pat<(v8i32 (sra (v8i32 VR256:$src1), + (v8i32 (bitconvert (memopv4i64 addr:$src2))))), (VPSRAVDYrm VR256:$src1, addr:$src2)>; } - - - diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt index 7f4a433..628afb5 100644 --- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = XCoreDesc parent = XCore -required_libraries = MC Support XCoreInfo +required_libraries = MC XCoreInfo add_to_library_groups = XCore diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt index 1d1b722..d0b8e54 100644 --- a/lib/Target/XCore/TargetInfo/LLVMBuild.txt +++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = XCoreInfo parent = XCore -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = XCore diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 4d8dbc2..8fa66fc 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_library(LLVMipo add_llvm_library_dependencies(LLVMipo LLVMAnalysis LLVMCore + LLVMInstCombine LLVMScalarOpts LLVMSupport LLVMTarget diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index bf5d5f4..884faca 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,5 +20,5 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis Core InstCombine Scalar Support Target TransformUtils IPA +required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index b89f730..c12f403 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1875,8 +1875,14 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, return; // The bitcast references the original alloca. Replace its uses with - // references to the first new element alloca. - Instruction *Val = NewElts[0]; + // references to the alloca containing offset zero (which is normally at + // index zero, but might not be in cases involving structs with elements + // of size zero). + Type *T = AI->getAllocatedType(); + uint64_t EltOffset = 0; + Type *IdxTy; + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + Instruction *Val = NewElts[Idx]; if (Val->getType() != BC->getDestTy()) { Val = new BitCastInst(Val, BC->getDestTy(), "", BC); Val->takeName(BC); @@ -2160,6 +2166,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } unsigned EltSize = TD->getTypeAllocSize(EltTy); + if (!EltSize) + continue; IRBuilder<> Builder(MI); diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt index 6ba983c..dea7b02 100644 --- a/lib/Transforms/Utils/LLVMBuild.txt +++ b/lib/Transforms/Utils/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = TransformUtils parent = Transforms -required_libraries = Analysis Core Support Target IPA +required_libraries = Analysis Core IPA Support Target diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index fa8061c..e60a41b 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -518,3 +518,10 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { User->eraseFromParent(); } } + +bool +LoadAndStorePromoter::isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &Insts) + const { + return std::find(Insts.begin(), Insts.end(), I) != Insts.end(); +} diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index a84a046..cd94da1 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -71,12 +71,14 @@ bool Constant::isAllOnesValue() const { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue(); - // Check for constant vectors + // Check for constant vectors which are splats of -1 values. if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) - return CV->isAllOnesValue(); + if (Constant *Splat = CV->getSplatValue()) + return Splat->isAllOnesValue(); return false; } + // Constructor to create a '0' constant of arbitrary type... Constant *Constant::getNullValue(Type *Ty) { switch (Ty->getTypeID()) { @@ -1071,26 +1073,6 @@ void ConstantVector::destroyConstant() { destroyConstantImpl(); } -/// This function will return true iff every element in this vector constant -/// is set to all ones. -/// @returns true iff this constant's elements are all set to all ones. -/// @brief Determine if the value is all ones. -bool ConstantVector::isAllOnesValue() const { - // Check out first element. - const Constant *Elt = getOperand(0); - const ConstantInt *CI = dyn_cast<ConstantInt>(Elt); - const ConstantFP *CF = dyn_cast<ConstantFP>(Elt); - - // Then make sure all remaining elements point to the same value. - for (unsigned I = 1, E = getNumOperands(); I < E; ++I) - if (getOperand(I) != Elt) - return false; - - // First value is all-ones. - return (CI && CI->isAllOnesValue()) || - (CF && CF->isAllOnesValue()); -} - /// getSplatValue - If this is a splat constant, where all of the /// elements have the same value, return that value. Otherwise return null. Constant *ConstantVector::getSplatValue() const { |