aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp43
-rw-r--r--lib/CodeGen/AllocationOrder.cpp2
-rw-r--r--lib/CodeGen/Analysis.cpp100
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp34
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp97
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp43
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h22
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h11
-rw-r--r--lib/CodeGen/BranchFolding.cpp202
-rw-r--r--lib/CodeGen/CMakeLists.txt6
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp53
-rw-r--r--lib/CodeGen/CallingConvLower.cpp8
-rw-r--r--lib/CodeGen/CodeGen.cpp4
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp81
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h2
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp91
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp11
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp12
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp618
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp5
-rw-r--r--lib/CodeGen/IfConversion.cpp45
-rw-r--r--lib/CodeGen/InlineSpiller.cpp20
-rw-r--r--lib/CodeGen/InterferenceCache.cpp93
-rw-r--r--lib/CodeGen/InterferenceCache.h34
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp70
-rw-r--r--lib/CodeGen/LexicalScopes.cpp2
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp39
-rw-r--r--lib/CodeGen/LiveInterval.cpp270
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp651
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp24
-rw-r--r--lib/CodeGen/LiveIntervalUnion.h26
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp120
-rw-r--r--lib/CodeGen/LiveRangeCalc.h63
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp88
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp152
-rw-r--r--lib/CodeGen/LiveRegMatrix.h148
-rw-r--r--lib/CodeGen/LiveVariables.cpp69
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp3
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp11
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp31
-rw-r--r--lib/CodeGen/MachineCSE.cpp24
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp50
-rw-r--r--lib/CodeGen/MachineFunction.cpp41
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp9
-rw-r--r--lib/CodeGen/MachineInstr.cpp130
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp4
-rw-r--r--lib/CodeGen/MachineLICM.cpp18
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp16
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp21
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp45
-rw-r--r--lib/CodeGen/MachineScheduler.cpp926
-rw-r--r--lib/CodeGen/MachineVerifier.cpp167
-rw-r--r--lib/CodeGen/PHIElimination.cpp184
-rw-r--r--lib/CodeGen/Passes.cpp272
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp98
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp39
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp374
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--lib/CodeGen/RegAllocBase.cpp161
-rw-r--r--lib/CodeGen/RegAllocBase.h85
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp171
-rw-r--r--lib/CodeGen/RegAllocFast.cpp43
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp238
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp187
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp7
-rw-r--r--lib/CodeGen/RegisterClassInfo.h132
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp1211
-rw-r--r--lib/CodeGen/RegisterCoalescer.h29
-rw-r--r--lib/CodeGen/RegisterPressure.cpp841
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp25
-rw-r--r--lib/CodeGen/RenderMachineFunction.cpp1013
-rw-r--r--lib/CodeGen/RenderMachineFunction.h338
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp23
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp406
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp278
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp37
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp54
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h6
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1000
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h7
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp57
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp79
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp42
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp84
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp153
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp332
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h13
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp77
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp159
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp8
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp20
-rw-r--r--lib/CodeGen/SlotIndexes.cpp3
-rw-r--r--lib/CodeGen/SpillPlacement.cpp11
-rw-r--r--lib/CodeGen/SplitKit.cpp18
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp3
-rw-r--r--lib/CodeGen/TailDuplication.cpp34
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp208
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp62
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp251
-rw-r--r--lib/CodeGen/VirtRegMap.cpp179
-rw-r--r--lib/CodeGen/VirtRegMap.h7
117 files changed, 7929 insertions, 6159 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 822a564..205480a 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -16,10 +16,10 @@
#define DEBUG_TYPE "post-RA-sched"
#include "AggressiveAntiDepBreaker.h"
-#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -157,8 +157,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- for (const uint16_t *Alias = TRI->getOverlaps(*I);
- unsigned Reg = *Alias; ++Alias) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
@@ -173,8 +173,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- for (const uint16_t *Alias = TRI->getOverlaps(*I);
- unsigned Reg = *Alias; ++Alias) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
@@ -189,8 +189,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- for (const uint16_t *Alias = TRI->getOverlaps(Reg);
- unsigned AliasReg = *Alias; ++Alias) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
State->UnionGroups(AliasReg, 0);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
@@ -265,10 +265,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
IsImplicitDefUse(MI, MO)) {
const unsigned Reg = MO.getReg();
PassthruRegs.insert(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- PassthruRegs.insert(*Subreg);
- }
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ PassthruRegs.insert(*SubRegs);
}
}
}
@@ -333,9 +331,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
}
// Repeat for subregisters.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- unsigned SubregReg = *Subreg;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
if (!State->IsLive(SubregReg)) {
KillIndices[SubregReg] = KillIdx;
DefIndices[SubregReg] = ~0u;
@@ -392,8 +389,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// Any aliased that are live at this point are completely or
// partially defined here, so group those aliases with Reg.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
if (State->IsLive(AliasReg)) {
State->UnionGroups(Reg, AliasReg);
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
@@ -404,7 +401,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// Note register reference...
const TargetRegisterClass *RC = NULL;
if (i < MI->getDesc().getNumOperands())
- RC = TII->getRegClass(MI->getDesc(), i, TRI);
+ RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
RegRefs.insert(std::make_pair(Reg, RR));
}
@@ -423,9 +420,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
continue;
// Update def for Reg and aliases.
- for (const uint16_t *Alias = TRI->getOverlaps(Reg);
- unsigned AliasReg = *Alias; ++Alias)
- DefIndices[AliasReg] = Count;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ DefIndices[*AI] = Count;
}
}
@@ -479,7 +475,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// Note register reference...
const TargetRegisterClass *RC = NULL;
if (i < MI->getDesc().getNumOperands())
- RC = TII->getRegClass(MI->getDesc(), i, TRI);
+ RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
RegRefs.insert(std::make_pair(Reg, RR));
}
@@ -678,9 +674,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
goto next_super_reg;
} else {
bool found = false;
- for (const uint16_t *Alias = TRI->getAliasSet(NewReg);
- *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
if (State->IsLive(AliasReg) ||
(KillIndices[Reg] > DefIndices[AliasReg])) {
DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 87f6431..32ad34a 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -15,9 +15,9 @@
//===----------------------------------------------------------------------===//
#include "AllocationOrder.h"
-#include "RegisterClassInfo.h"
#include "VirtRegMap.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
using namespace llvm;
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 00874d4..447f398 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -203,6 +203,63 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
}
}
+
+/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look
+/// through it (and any transitive noop operands to it) and return its input
+/// value. This is used to determine if a tail call can be formed.
+///
+static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
+ // If V is not an instruction, it can't be looked through.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V;
+
+ Value *Op = I->getOperand(0);
+
+ // Look through truly no-op truncates.
+ if (isa<TruncInst>(I) &&
+ TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType()))
+ return getNoopInput(I->getOperand(0), TLI);
+
+ // Look through truly no-op bitcasts.
+ if (isa<BitCastInst>(I)) {
+ // No type change at all.
+ if (Op->getType() == I->getType())
+ return getNoopInput(Op, TLI);
+
+ // Pointer to pointer cast.
+ if (Op->getType()->isPointerTy() && I->getType()->isPointerTy())
+ return getNoopInput(Op, TLI);
+
+ if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) &&
+ TLI.isTypeLegal(EVT::getEVT(Op->getType())) &&
+ TLI.isTypeLegal(EVT::getEVT(I->getType())))
+ return getNoopInput(Op, TLI);
+ }
+
+ // Look through inttoptr.
+ if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) {
+ // Make sure this isn't a truncating or extending cast. We could support
+ // this eventually, but don't bother for now.
+ if (TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
+ return getNoopInput(Op, TLI);
+ }
+
+ // Look through ptrtoint.
+ if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) {
+ // Make sure this isn't a truncating or extending cast. We could support
+ // this eventually, but don't bother for now.
+ if (TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
+ return getNoopInput(Op, TLI);
+ }
+
+
+ // Otherwise it's not something we can look through.
+ return V;
+}
+
+
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
@@ -226,7 +283,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// been fully understood.
if (!Ret &&
(!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
- !isa<UnreachableInst>(Term))) return false;
+ !isa<UnreachableInst>(Term)))
+ return false;
// If I will have a chain, make sure no other instruction that will have a
// chain interposes between I and the return.
@@ -264,28 +322,28 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
return false;
// Otherwise, make sure the unmodified return value of I is the return value.
- for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
- U = dyn_cast<Instruction>(U->getOperand(0))) {
- if (!U)
- return false;
- if (!U->hasOneUse())
+ // We handle two cases: multiple return values + scalars.
+ Value *RetVal = Ret->getOperand(0);
+ if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
+ // Handle scalars first.
+ return getNoopInput(Ret->getOperand(0), TLI) == I;
+
+ // If this is an aggregate return, look through the insert/extract values and
+ // see if each is transparent.
+ for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
+ i != e; ++i) {
+ const Value *InScalar = FindInsertedValue(RetVal, i);
+ if (InScalar == 0) return false;
+ InScalar = getNoopInput(InScalar, TLI);
+
+ // If the scalar value being inserted is an extractvalue of the right index
+ // from the call, then everything is good.
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
+ if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
+ EVI->getIndices()[0] != i)
return false;
- if (U == I)
- break;
- // Check for a truly no-op truncate.
- if (isa<TruncInst>(U) &&
- TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
- continue;
- // Check for a truly no-op bitcast.
- if (isa<BitCastInst>(U) &&
- (U->getOperand(0)->getType() == U->getType() ||
- (U->getOperand(0)->getType()->isPointerTy() &&
- U->getType()->isPointerTy())))
- continue;
- // Otherwise it's not a true no-op.
- return false;
}
-
+
return true;
}
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index b60fda8..bf5d8c4 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -44,9 +44,7 @@ EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
ARMException::ARMException(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
- {}
+ : DwarfException(A) {}
ARMException::~ARMException() {}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index f6cde98..b7fc663 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -16,6 +16,7 @@
#if !defined(ANDROID_TARGET_BUILD) || defined(ANDROID_ENGINEERING_BUILD)
# include "DwarfDebug.h"
# include "DwarfException.h"
+# include "llvm/DebugInfo.h"
#endif // !ANDROID_TARGET_BUILD || ANDROID_ENGINEERING_BUILD
#include "llvm/Module.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
@@ -26,7 +27,6 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -484,10 +484,8 @@ void AsmPrinter::EmitFunctionHeader() {
void AsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
- if (CurrentFnSym->isUndefined()) {
- OutStreamer.ForceCodeRegion();
+ if (CurrentFnSym->isUndefined())
return OutStreamer.EmitLabel(CurrentFnSym);
- }
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' label emitted multiple times to assembly file");
@@ -624,7 +622,7 @@ bool AsmPrinter::needsSEHMoves() {
}
bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
- return MAI->doesDwarfUseRelocationsForStringPool();
+ return MAI->doesDwarfUseRelocationsAcrossSections();
}
void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
@@ -813,8 +811,8 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
- for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg());
- *SR && Reg < 0; ++SR) {
+ for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0;
+ ++SR) {
Reg = TRI->getDwarfRegNum(*SR, false);
// FIXME: Get the bit range this register uses of the superregister
// so that we can produce a DW_OP_bit_piece
@@ -1102,15 +1100,6 @@ void AsmPrinter::EmitJumpTableInfo() {
EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
- // If we know the form of the jump table, go ahead and tag it as such.
- if (!JTInDiffSection) {
- if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) {
- OutStreamer.EmitJumpTable32Region();
- } else {
- OutStreamer.EmitDataRegion();
- }
- }
-
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1416,13 +1405,14 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
unsigned Size)
const {
- // Emit Label+Offset
- const MCExpr *Plus =
- MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext),
- MCConstantExpr::Create(Offset, OutContext),
- OutContext);
+ // Emit Label+Offset (or just Label if Offset is zero)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext);
+ if (Offset)
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
- OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
+ OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/);
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index e9e9335..711375b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -329,11 +329,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
}
- // We may have a location metadata attached to the end of the
- // instruction, and at no point should see metadata at any
- // other point while processing. It's an error if so.
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
if (OpNo >= MI->getNumOperands() ||
- MI->getOperand(OpNo).isMetadata()) {
+ MI->getOperand(OpNo).isMetadata()) {
Error = true;
} else {
unsigned OpFlags = MI->getOperand(OpNo).getImm();
@@ -413,9 +413,28 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
/// instruction, using the specified assembler variant. Targets should
/// override this to format as appropriate.
bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
- // Target doesn't support this yet!
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (ExtraCode[0]) {
+ default:
+ return true; // Unknown modifier.
+ case 'c': // Substitute immediate value without immediate syntax
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm();
+ return false;
+ case 'n': // Negate the immediate constant.
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << -MO.getImm();
+ return false;
+ }
+ }
return true;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index cc5b642..d231665 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -17,9 +17,9 @@
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/Constants.h"
+#include "llvm/DIBuilder.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
-#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
@@ -33,7 +33,7 @@ using namespace llvm;
/// CompileUnit - Compile unit constructor.
CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
- DwarfDebug *DW)
+ DwarfDebug *DW)
: ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
@@ -198,7 +198,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
return;
DIFile File = Ty.getFile();
unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
- File.getDirectory());
+ File.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -308,7 +308,8 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
} else if (Element == DIBuilder::OpDeref) {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ if (!Location.isReg())
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
} else llvm_unreachable("unknown DIBuilder Opcode");
}
@@ -418,27 +419,12 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- if (Location.isReg()) {
- if (Reg < 32)
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
- else {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
- addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
- }
- } else {
- if (Reg < 32)
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
- else {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
- addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
- }
-
- addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
- }
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
@@ -646,8 +632,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
}
/// addType - Add a new type attribute to the specified entity.
-void CompileUnit::addType(DIE *Entity, DIType Ty,
- unsigned Attribute) {
+void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
if (!Ty.Verify())
return;
@@ -776,6 +761,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
Buffer.addChild(ElemDie);
}
}
+ DIType DTy = CTy.getTypeDerivedFrom();
+ if (DTy.Verify()) {
+ addType(&Buffer, DTy);
+ addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1);
+ }
}
break;
case dwarf::DW_TAG_subroutine_type: {
@@ -801,9 +791,9 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add prototype flag if we're dealing with a C language and the
// function has been prototyped.
if (isPrototyped &&
- (Language == dwarf::DW_LANG_C89 ||
- Language == dwarf::DW_LANG_C99 ||
- Language == dwarf::DW_LANG_ObjC))
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
}
break;
@@ -846,19 +836,19 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
addSourceLine(ElemDie, DV);
} else if (Element.isDerivedType()) {
- DIDerivedType DDTy(Element);
- if (DDTy.getTag() == dwarf::DW_TAG_friend) {
- ElemDie = new DIE(dwarf::DW_TAG_friend);
- addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
- } else
- ElemDie = createMemberDIE(DIDerivedType(Element));
+ DIDerivedType DDTy(Element);
+ if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+ ElemDie = new DIE(dwarf::DW_TAG_friend);
+ addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
+ } else
+ ElemDie = createMemberDIE(DIDerivedType(Element));
} else if (Element.isObjCProperty()) {
DIObjCProperty Property(Element);
ElemDie = new DIE(Property.getTag());
StringRef PropertyName = Property.getObjCPropertyName();
addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
- addType(ElemDie, Property.getType());
- addSourceLine(ElemDie, Property);
+ addType(ElemDie, Property.getType());
+ addSourceLine(ElemDie, Property);
StringRef GetterName = Property.getObjCPropertyGetterName();
if (!GetterName.empty())
addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
@@ -925,19 +915,21 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (!Name.empty())
addString(&Buffer, dwarf::DW_AT_name, Name);
- if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
- || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
- {
+ if (Tag == dwarf::DW_TAG_enumeration_type ||
+ Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type) {
// Add size if non-zero (derived types might be zero-sized.)
+ // TODO: Do we care about size for enum forward declarations?
if (Size)
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
- else {
+ else if (!CTy.isForwardDecl())
// Add zero size if it is not a forward declaration.
- if (CTy.isForwardDecl())
- addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
- else
- addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
- }
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+
+ // If we're a forward decl, say so.
+ if (CTy.isForwardDecl())
+ addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
// Add source line info if available.
if (!CTy.isForwardDecl())
@@ -968,7 +960,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
/// for the given DITemplateValueParameter.
DIE *
-CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
DIE *ParamDIE = getDIE(TPV);
if (ParamDIE)
return ParamDIE;
@@ -1015,17 +1007,17 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (SPDie)
return SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ insertDIE(SP, SPDie);
+
DISubprogram SPDecl = SP.getFunctionDeclaration();
DIE *DeclDie = NULL;
if (SPDecl.isSubprogram()) {
DeclDie = getOrCreateSubprogramDIE(SPDecl);
}
- SPDie = new DIE(dwarf::DW_TAG_subprogram);
-
- // DW_TAG_inlined_subroutine may refer to this DIE.
- insertDIE(SP, SPDie);
-
// Add to context owner.
addToContextOwner(SPDie, SP.getContext());
@@ -1240,7 +1232,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
}
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
+ DIE *IndexTy) {
DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
uint64_t L = SR.getLo();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 45e407e..b4ff9e8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,7 +15,7 @@
#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#include "DIE.h"
-#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/DebugInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/OwningPtr.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index cb78878..649684a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -17,9 +17,10 @@
#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DIBuilder.h"
#include "llvm/Module.h"
#include "llvm/Instructions.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -32,11 +33,10 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Analysis/DIBuilder.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -117,7 +117,6 @@ DIType DbgVariable::getType() const {
if (getName() == DT.getName())
return (DT.getTypeDerivedFrom());
}
- return Ty;
}
return Ty;
}
@@ -127,6 +126,7 @@ DIType DbgVariable::getType() const {
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: Asm(A), MMI(Asm->MMI), FirstCU(0),
AbbreviationsSet(InitAbbreviationsSetSize),
+ SourceIdMap(DIEValueAllocator), StringPool(DIEValueAllocator),
PrevLabel(NULL) {
NextStringPoolNumber = 0;
@@ -566,7 +566,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section.
- if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
Asm->GetTempSymbol("section_line"));
else
@@ -1310,8 +1310,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
continue;
- for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg());
- unsigned Reg = *AI; ++AI) {
+ for (MCRegAliasIterator AI(MOI->getReg(), TRI, true);
+ AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
const MDNode *Var = LiveUserVar[Reg];
if (!Var)
continue;
@@ -1381,7 +1382,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MF->getFunction()->getContext());
recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
FnStartDL.getScope(MF->getFunction()->getContext()),
- 0);
+ DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0);
}
}
@@ -1421,6 +1422,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DIVariable DV(Variables.getElement(i));
if (!DV || !DV.Verify() || !ProcessedVars.insert(DV))
continue;
+ // Check that DbgVariable for DV wasn't created earlier, when
+ // findAbstractVariable() was called for inlined instance of DV.
+ LLVMContext &Ctx = DV->getContext();
+ DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx);
+ if (AbstractVariables.lookup(CleanDV))
+ continue;
if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
addScopeVariable(Scope, new DbgVariable(DV, NULL));
}
@@ -1623,7 +1630,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
// DW_AT_range Value encodes offset in debug_range section.
DIEInteger *V = cast<DIEInteger>(Values[i]);
- if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) {
Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
V->getValue(),
4);
@@ -1636,10 +1643,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
break;
}
case dwarf::DW_AT_location: {
- if (DIELabel *L = dyn_cast<DIELabel>(Values[i]))
- Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
- else
+ if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ Asm->EmitLabelReference(L->getValue(), 4);
+ else
+ Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+ } else {
Values[i]->EmitValue(Asm, Form);
+ }
break;
}
case dwarf::DW_AT_accessibility: {
@@ -2049,9 +2060,11 @@ void DwarfDebug::emitDebugLoc() {
if (Element == DIBuilder::OpPlus) {
Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
Asm->EmitULEB128(DV.getAddrElement(++i));
- } else if (Element == DIBuilder::OpDeref)
- Asm->EmitInt8(dwarf::DW_OP_deref);
- else llvm_unreachable("unknown Opcode found in complex address");
+ } else if (Element == DIBuilder::OpDeref) {
+ if (!Entry.Loc.isReg())
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ } else
+ llvm_unreachable("unknown Opcode found in complex address");
}
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 83f30f5..d1d6512 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,11 +14,11 @@
#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#include "DIE.h"
+#include "llvm/DebugInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "DIE.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -188,6 +188,9 @@ class DwarfDebug {
/// MMI - Collected machine module information.
MachineModuleInfo *MMI;
+ /// DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
//===--------------------------------------------------------------------===//
// Attributes used to construct specific Dwarf sections.
//
@@ -210,11 +213,11 @@ class DwarfDebug {
/// SourceIdMap - Source id map, i.e. pair of source filename and directory,
/// separated by a zero byte, mapped to a unique id.
- StringMap<unsigned> SourceIdMap;
+ StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
/// StringPool - A String->Symbol mapping of strings used by indirect
/// references.
- StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
+ StringMap<std::pair<MCSymbol*, unsigned>, BumpPtrAllocator&> StringPool;
unsigned NextStringPoolNumber;
/// SectionMap - Provides a unique id per text section.
@@ -232,7 +235,7 @@ class DwarfDebug {
/// ScopeVariables - Collection of dbg variables of a scope.
DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
- /// AbstractVariables - Collection on abstract variables.
+ /// AbstractVariables - Collection of abstract variables.
DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
/// DotDebugLocEntries - Collection of DotDebugLocEntry.
@@ -292,9 +295,6 @@ class DwarfDebug {
std::vector<FunctionDebugFrameInfo> DebugFrames;
- // DIEValueAllocator - All DIEValues are allocated through this allocator.
- BumpPtrAllocator DIEValueAllocator;
-
// Section Symbols: these are assembler temporary labels that are emitted at
// the beginning of each supported dwarf section. These are used to form
// section offsets and are created by EmitSectionLabels.
@@ -333,9 +333,6 @@ private:
/// of the function.
DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
- /// constructVariableDIE - Construct a DIE for the given DbgVariable.
- DIE *constructVariableDIE(DbgVariable *DV, LexicalScope *S);
-
/// constructScopeDIE - Construct a DIE for this scope.
DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
@@ -517,9 +514,6 @@ public:
/// in the SourceIds map.
unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
- /// createSubprogramDIE - Create new DIE using SP.
- DIE *createSubprogramDIE(DISubprogram SP);
-
/// getStringPool - returns the entry into the start of the pool.
MCSymbol *getStringPool();
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index b5f86ab..75f6056 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -175,17 +175,6 @@ public:
};
class ARMException : public DwarfException {
- /// shouldEmitTable - Per-function flag to indicate if EH tables should
- /// be emitted.
- bool shouldEmitTable;
-
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
- bool shouldEmitMoves;
-
- /// shouldEmitTableModule - Per-module flag to indicate if EH tables
- /// should be emitted.
- bool shouldEmitTableModule;
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index ef1d2ba..fb65bb7 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -137,9 +137,8 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
break;
unsigned Reg = I->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- ImpDefRegs.insert(SubReg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ ImpDefRegs.insert(*SubRegs);
++I;
}
if (ImpDefRegs.empty())
@@ -188,7 +187,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Use a RegScavenger to help update liveness when required.
MachineRegisterInfo &MRI = MF.getRegInfo();
- if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF))
+ if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
RS = new RegScavenger();
else
MRI.invalidateLiveness();
@@ -819,10 +818,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
}
bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
-
- if (!EnableTailMerge) return false;
-
bool MadeChange = false;
+ if (!EnableTailMerge) return MadeChange;
// First find blocks with no successors.
MergePotentials.clear();
@@ -839,6 +836,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (MergePotentials.size() == TailMergeThreshold)
for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
TriedMerging.insert(MergePotentials[i].getBlock());
+
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(NULL, NULL);
@@ -864,88 +862,97 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I) {
- if (I->pred_size() >= 2) {
- SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
- MachineBasicBlock *IBB = I;
- MachineBasicBlock *PredBB = prior(I);
- MergePotentials.clear();
- for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
- E2 = I->pred_end();
- P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
- MachineBasicBlock *PBB = *P;
- if (TriedMerging.count(PBB))
- continue;
- // Skip blocks that loop to themselves, can't tail merge these.
- if (PBB == IBB)
- continue;
- // Visit each predecessor only once.
- if (!UniquePreds.insert(PBB))
- continue;
- // Skip blocks which may jump to a landing pad. Can't tail merge these.
- if (PBB->getLandingPadSuccessor())
- continue;
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
- // Failing case: IBB is the target of a cbr, and
- // we cannot reverse the branch.
- SmallVector<MachineOperand, 4> NewCond(Cond);
- if (!Cond.empty() && TBB == IBB) {
- if (TII->ReverseBranchCondition(NewCond))
+ if (I->pred_size() < 2) continue;
+ SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
+ MachineBasicBlock *PBB = *P;
+ if (TriedMerging.count(PBB))
+ continue;
+
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB == IBB)
+ continue;
+
+ // Visit each predecessor only once.
+ if (!UniquePreds.insert(PBB))
+ continue;
+
+ // Skip blocks which may jump to a landing pad. Can't tail merge these.
+ if (PBB->getLandingPadSuccessor())
+ continue;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and we cannot reverse the
+ // branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB == IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = llvm::next(MachineFunction::iterator(PBB));
+ }
+
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice to have
+ // a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
+ PredNextBB = IP;
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB != IBB) // ubr
+ continue;
+ } else {
+ if (TBB != IBB && IBB != PredNextBB) // cbr
continue;
- // This is the QBB case described above
- if (!FBB)
- FBB = llvm::next(MachineFunction::iterator(PBB));
- }
- // Failing case: the only way IBB can be reached from PBB is via
- // exception handling. Happens for landing pads. Would be nice
- // to have a bit in the edge so we didn't have to do all this.
- if (IBB->isLandingPad()) {
- MachineFunction::iterator IP = PBB; IP++;
- MachineBasicBlock *PredNextBB = NULL;
- if (IP != MF.end())
- PredNextBB = IP;
- if (TBB == NULL) {
- if (IBB != PredNextBB) // fallthrough
- continue;
- } else if (FBB) {
- if (TBB != IBB && FBB != IBB) // cbr then ubr
- continue;
- } else if (Cond.empty()) {
- if (TBB != IBB) // ubr
- continue;
- } else {
- if (TBB != IBB && IBB != PredNextBB) // cbr
- continue;
- }
- }
- // Remove the unconditional branch at the end, if any.
- if (TBB && (Cond.empty() || FBB)) {
- DebugLoc dl; // FIXME: this is nowhere
- TII->RemoveBranch(*PBB);
- if (!Cond.empty())
- // reinsert conditional branch only, for now
- TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
}
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
}
+
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
+ }
+
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
}
- // If this is a large problem, avoid visiting the same basic blocks
- // multiple times.
- if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
- if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(IBB, PredBB);
- // Reinsert an unconditional branch if needed.
- // The 1 below can occur as a result of removing blocks in
- // TryTailMergeBlocks.
- PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
- if (MergePotentials.size() == 1 &&
- MergePotentials.begin()->getBlock() != PredBB)
- FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
}
+
+ // If this is a large problem, avoid visiting the same basic blocks multiple
+ // times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+
+ // Reinsert an unconditional branch if needed. The 1 below can occur as a
+ // result of removing blocks in TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
}
+
return MadeChange;
}
@@ -1459,7 +1466,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
}
/// findHoistingInsertPosAndDeps - Find the location to move common instructions
-/// in successors to. The location is ususally just before the terminator,
+/// in successors to. The location is usually just before the terminator,
/// however if the terminator is a conditional branch and its previous
/// instruction is the flag setting instruction, the previous instruction is
/// the preferred location. This function also gathers uses and defs of the
@@ -1483,9 +1490,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!Reg)
continue;
if (MO.isUse()) {
- Uses.insert(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Uses.insert(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
} else if (!MO.isDead())
// Don't try to hoist code in the rare case the terminator defines a
// register that is later used.
@@ -1545,18 +1551,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!Reg)
continue;
if (MO.isUse()) {
- Uses.insert(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Uses.insert(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
} else {
if (Uses.count(Reg)) {
Uses.erase(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- Uses.erase(*SR); // Use getSubRegisters to be conservative
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Uses.erase(*SubRegs); // Use sub-registers to be conservative
}
- Defs.insert(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Defs.insert(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Defs.insert(*AI);
}
}
@@ -1683,8 +1687,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
unsigned Reg = MO.getReg();
if (!Reg || !LocalDefsSet.count(Reg))
continue;
- for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
- LocalDefsSet.erase(*OR);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.erase(*AI);
}
// Track local defs so we can update liveins.
@@ -1696,8 +1700,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!Reg)
continue;
LocalDefs.push_back(Reg);
- for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
- LocalDefsSet.insert(*OR);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.insert(*AI);
}
HasDups = true;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 21729cd..d240389 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_library(LLVMCodeGen
DeadMachineInstructionElim.cpp
DFAPacketizer.cpp
DwarfEHPrepare.cpp
+ EarlyIfConversion.cpp
EdgeBundles.cpp
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
@@ -30,6 +31,7 @@ add_llvm_library(LLVMCodeGen
LiveInterval.cpp
LiveIntervalAnalysis.cpp
LiveIntervalUnion.cpp
+ LiveRegMatrix.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
LiveRangeCalc.cpp
@@ -77,8 +79,8 @@ add_llvm_library(LLVMCodeGen
RegAllocPBQP.cpp
RegisterClassInfo.cpp
RegisterCoalescer.cpp
+ RegisterPressure.cpp
RegisterScavenging.cpp
- RenderMachineFunction.cpp
ScheduleDAG.cpp
ScheduleDAGInstrs.cpp
ScheduleDAGPrinter.cpp
@@ -103,5 +105,7 @@ add_llvm_library(LLVMCodeGen
VirtRegMap.cpp
)
+add_dependencies(LLVMCodeGen intrinsics_gen)
+
add_subdirectory(SelectionDAG)
add_subdirectory(AsmPrinter)
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index ea16a25..939af3f 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -39,18 +39,20 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
MachineFunctionPass::getAnalysisUsage(au);
}
-bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
<< "********** Function: "
- << fn.getFunction()->getName() << '\n');
-
- LiveIntervals &lis = getAnalysis<LiveIntervals>();
- VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>());
- for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) {
- LiveInterval &li = *I->second;
- if (TargetRegisterInfo::isVirtualRegister(li.reg))
- vrai.CalculateWeightAndHint(li);
+ << MF.getFunction()->getName() << '\n');
+
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>());
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ VRAI.CalculateWeightAndHint(LIS.getInterval(Reg));
}
return false;
}
@@ -86,6 +88,27 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
return tri.getMatchingSuperReg(hreg, sub, rc);
}
+// Check if all values in LI are rematerializable
+static bool isRematerializable(const LiveInterval &LI,
+ const LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ if (VNI->isPHIDef())
+ return false;
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+
+ if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis()))
+ return false;
+ }
+ return true;
+}
+
void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
MachineRegisterInfo &mri = MF.getRegInfo();
const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
@@ -171,17 +194,11 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
}
// If all of the definitions of the interval are re-materializable,
- // it is a preferred candidate for spilling. If none of the defs are
- // loads, then it's potentially very cheap to re-materialize.
+ // it is a preferred candidate for spilling.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
- bool isLoad = false;
- if (LIS.isReMaterializable(li, 0, isLoad)) {
- if (isLoad)
- totalWeight *= 0.9F;
- else
- totalWeight *= 0.5F;
- }
+ if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo()))
+ totalWeight *= 0.5F;
li.weight = normalizeSpillWeight(totalWeight, li.getSize());
}
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 2b7dfdb..0b747fd 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -49,8 +49,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
Size = MinSize;
if (MinAlign > (int)Align)
Align = MinAlign;
- if (MF.getFrameInfo()->getMaxAlignment() < Align)
- MF.getFrameInfo()->setMaxAlignment(Align);
+ MF.getFrameInfo()->ensureMaxAlignment(Align);
TM.getTargetLowering()->HandleByVal(this, Size);
unsigned Offset = AllocateStack(Size, Align);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
@@ -58,9 +57,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void CCState::MarkAllocated(unsigned Reg) {
- for (const uint16_t *Alias = TRI.getOverlaps(Reg);
- unsigned Reg = *Alias; ++Alias)
- UsedRegs[Reg/32] |= 1 << (Reg&31);
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI/32] |= 1 << (*AI&31);
}
/// AnalyzeFormalArguments - Analyze an array of argument values,
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index a81bb5c..fb2c2e8 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeCalculateSpillWeightsPass(Registry);
initializeCodePlacementOptPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeEarlyIfConverterPass(Registry);
initializeExpandPostRAPass(Registry);
initializeExpandISelPseudosPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
@@ -53,7 +54,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeProcessImplicitDefsPass(Registry);
initializePEIPass(Registry);
initializeRegisterCoalescerPass(Registry);
- initializeRenderMachineFunctionPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
@@ -65,7 +65,9 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeUnreachableBlockElimPass(Registry);
initializeUnreachableMachineBlockElimPass(Registry);
initializeVirtRegMapPass(Registry);
+ initializeVirtRegRewriterPass(Registry);
initializeLowerIntrinsicsPass(Registry);
+ initializeMachineFunctionPrinterPassPass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index c13c05e..99233df 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -201,7 +201,7 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
// fallthrough edge.
if (!Prior->isSuccessor(End))
goto next_pred;
- // Otherwise we can stop scanning and procede to move the blocks.
+ // Otherwise we can stop scanning and proceed to move the blocks.
break;
}
// If we hit a switch or something complicated, don't move anything
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index bad5010..a9de1c7 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -62,17 +62,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- unsigned Reg = *I;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BBSize;
- DefIndices[Reg] = ~0u;
-
- // Repeat, for all aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BBSize;
- DefIndices[AliasReg] = ~0u;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
}
}
}
@@ -84,17 +78,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BBSize;
- DefIndices[Reg] = ~0u;
-
- // Repeat, for all aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BBSize;
- DefIndices[AliasReg] = ~0u;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
}
}
@@ -104,18 +92,12 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
- unsigned Reg = *I;
- if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BBSize;
- DefIndices[Reg] = ~0u;
-
- // Repeat, for all aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BBSize;
- DefIndices[AliasReg] = ~0u;
+ if (!IsReturnBlock && !Pristine.test(*I)) continue;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
}
}
}
@@ -208,7 +190,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
const TargetRegisterClass *NewRC = 0;
if (i < MI->getDesc().getNumOperands())
- NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
@@ -218,11 +200,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Now check for aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
// If an alias of the reg is used during the live range, give up.
// Note that this allows us to skip checking if AntiDepReg
// overlaps with any of the aliases, among other things.
- unsigned AliasReg = *Alias;
+ unsigned AliasReg = *AI;
if (Classes[AliasReg]) {
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
@@ -236,9 +218,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
if (MO.isUse() && Special) {
if (!KeepRegs.test(Reg)) {
KeepRegs.set(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- KeepRegs.set(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ KeepRegs.set(*SubRegs);
}
}
}
@@ -247,7 +228,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
unsigned Count) {
// Update liveness.
- // Proceding upwards, registers that are defed but not used in this
+ // Proceeding upwards, registers that are defed but not used in this
// instruction are now dead.
if (!TII->isPredicated(MI)) {
@@ -282,9 +263,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
Classes[Reg] = 0;
RegRefs.erase(Reg);
// Repeat, for all subregs.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- unsigned SubregReg = *Subreg;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
KeepRegs.reset(SubregReg);
@@ -292,11 +272,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
RegRefs.erase(SubregReg);
}
// Conservatively mark super-registers as unusable.
- for (const uint16_t *Super = TRI->getSuperRegisters(Reg);
- *Super; ++Super) {
- unsigned SuperReg = *Super;
- Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- }
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
}
}
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -308,7 +285,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
const TargetRegisterClass *NewRC = 0;
if (i < MI->getDesc().getNumOperands())
- NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
@@ -328,8 +305,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
"Kill and Def maps aren't consistent for Reg!");
}
// Repeat, for all aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
if (KillIndices[AliasReg] == ~0u) {
KillIndices[AliasReg] = Count;
DefIndices[AliasReg] = ~0u;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 7746259..ad95c48 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,11 +17,11 @@
#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
#include "AntiDepBreaker.h"
-#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/ADT/BitVector.h"
#include <map>
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 5ff641c..ff2f113 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,10 +23,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
using namespace llvm;
@@ -100,22 +100,23 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
reserveResources(&MID);
}
-namespace {
+namespace llvm {
// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
// Schedule method to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT, bool IsPostRA);
+ MachineDominatorTree &MDT, bool IsPostRA);
// Schedule - Actual scheduling work.
void schedule();
};
-} // end anonymous namespace
+}
DefaultVLIWScheduler::DefaultVLIWScheduler(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
bool IsPostRA) :
ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+ CanHandleTerminators = true;
}
void DefaultVLIWScheduler::schedule() {
@@ -129,49 +130,25 @@ VLIWPacketizerList::VLIWPacketizerList(
bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
TII = TM.getInstrInfo();
ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
- SchedulerImpl = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
}
// VLIWPacketizerList Dtor
VLIWPacketizerList::~VLIWPacketizerList() {
- delete SchedulerImpl;
- delete ResourceTracker;
-}
-
-// ignorePseudoInstruction - ignore pseudo instructions.
-bool VLIWPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) {
- if (MI->isDebugValue())
- return true;
-
- if (TII->isSchedulingBoundary(MI, MBB, MF))
- return true;
-
- return false;
-}
-
-// isSoloInstruction - return true if instruction I must end previous
-// packet.
-bool VLIWPacketizerList::isSoloInstruction(MachineInstr *I) {
- if (I->isInlineAsm())
- return true;
-
- return false;
-}
+ if (VLIWScheduler)
+ delete VLIWScheduler;
-// addToPacket - Add I to the current packet and reserve resource.
-void VLIWPacketizerList::addToPacket(MachineInstr *MI) {
- CurrentPacketMIs.push_back(MI);
- ResourceTracker->reserveResources(MI);
+ if (ResourceTracker)
+ delete ResourceTracker;
}
// endPacket - End the current packet, bundle packet instructions and reset
// DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
- MachineInstr *I) {
+ MachineInstr *MI) {
if (CurrentPacketMIs.size() > 1) {
MachineInstr *MIFirst = CurrentPacketMIs.front();
- finalizeBundle(*MBB, MIFirst, I);
+ finalizeBundle(*MBB, MIFirst, MI);
}
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
@@ -181,31 +158,35 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator BeginItr,
MachineBasicBlock::iterator EndItr) {
- assert(MBB->end() == EndItr && "Bad EndIndex");
-
- SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size());
-
- // Build the DAG without reordering instructions.
- SchedulerImpl->schedule();
-
- // Remember scheduling units.
- SUnits = SchedulerImpl->SUnits;
+ assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+ VLIWScheduler->startBlock(MBB);
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+ VLIWScheduler->schedule();
+
+ // Generate MI -> SU map.
+ MIToSUnit.clear();
+ for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
+ SUnit *SU = &VLIWScheduler->SUnits[i];
+ MIToSUnit[SU->getInstr()] = SU;
+ }
// The main packetizer loop.
for (; BeginItr != EndItr; ++BeginItr) {
MachineInstr *MI = BeginItr;
- // Ignore pseudo instructions.
- if (ignorePseudoInstruction(MI, MBB))
- continue;
+ this->initPacketizerState();
// End the current packet if needed.
- if (isSoloInstruction(MI)) {
+ if (this->isSoloInstruction(MI)) {
endPacket(MBB, MI);
continue;
}
- SUnit *SUI = SchedulerImpl->getSUnit(MI);
+ // Ignore pseudo instructions.
+ if (this->ignorePseudoInstruction(MI, MBB))
+ continue;
+
+ SUnit *SUI = MIToSUnit[MI];
assert(SUI && "Missing SUnit Info!");
// Ask DFA if machine resource is available for MI.
@@ -215,13 +196,13 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
MachineInstr *MJ = *VI;
- SUnit *SUJ = SchedulerImpl->getSUnit(MJ);
+ SUnit *SUJ = MIToSUnit[MJ];
assert(SUJ && "Missing SUnit Info!");
// Is it legal to packetize SUI and SUJ together.
- if (!isLegalToPacketizeTogether(SUI, SUJ)) {
+ if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
// Allow packetization if dependency can be pruned.
- if (!isLegalToPruneDependencies(SUI, SUJ)) {
+ if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
// End the packet if dependency cannot be pruned.
endPacket(MBB, MI);
break;
@@ -234,11 +215,11 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
}
// Add MI to the current packet.
- addToPacket(MI);
+ BeginItr = this->addToPacket(MI);
} // For all instructions in BB.
// End any packet left behind.
endPacket(MBB, EndItr);
-
- SchedulerImpl->exitRegion();
+ VLIWScheduler->exitRegion();
+ VLIWScheduler->finishBlock();
}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index aa10d1d..b4394e8 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -171,9 +171,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
// this def.
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs)
- LivePhysRegs.reset(*SubRegs);
+ for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ LivePhysRegs.reset(*SR);
}
} else if (MO.isRegMask()) {
// Register mask of preserved registers. All clobbers are dead.
@@ -187,10 +186,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
if (MO.isReg() && MO.isUse()) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- LivePhysRegs.set(Reg);
- for (const uint16_t *AliasSet = TRI->getAliasSet(Reg);
- *AliasSet; ++AliasSet)
- LivePhysRegs.set(*AliasSet);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LivePhysRegs.set(*AI);
}
}
}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 944dd4f..7095624 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -39,7 +39,7 @@ namespace {
Constant *RewindFunction;
bool InsertUnwindResumeCalls(Function &Fn);
- Instruction *GetExceptionObject(ResumeInst *RI);
+ Value *GetExceptionObject(ResumeInst *RI);
public:
static char ID; // Pass identification, replacement for typeid.
@@ -68,9 +68,9 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
/// GetExceptionObject - Return the exception object from the value passed into
/// the 'resume' instruction (typically an aggregate). Clean up any dead
/// instructions, including the 'resume' instruction.
-Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
Value *V = RI->getOperand(0);
- Instruction *ExnObj = 0;
+ Value *ExnObj = 0;
InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
LoadInst *SelLoad = 0;
InsertValueInst *ExcIVI = 0;
@@ -81,7 +81,7 @@ Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
- ExnObj = cast<Instruction>(ExcIVI->getOperand(1));
+ ExnObj = ExcIVI->getOperand(1);
SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
EraseIVIs = true;
}
@@ -139,7 +139,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
// _Unwind_Resume to the end of the single resume block.
ResumeInst *RI = Resumes.front();
BasicBlock *UnwindBB = RI->getParent();
- Instruction *ExnObj = GetExceptionObject(RI);
+ Value *ExnObj = GetExceptionObject(RI);
// Call the _Unwind_Resume function.
CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
@@ -162,7 +162,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
BasicBlock *Parent = RI->getParent();
BranchInst::Create(UnwindBB, Parent);
- Instruction *ExnObj = GetExceptionObject(RI);
+ Value *ExnObj = GetExceptionObject(RI);
PN->addIncoming(ExnObj, Parent);
++NumResumesLowered;
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
new file mode 100644
index 0000000..9840a40
--- /dev/null
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -0,0 +1,618 @@
+//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Early if-conversion is for out-of-order CPUs that don't have a lot of
+// predicable instructions. The goal is to eliminate conditional branches that
+// may mispredict.
+//
+// Instructions from both sides of the branch are executed specutatively, and a
+// cmov instruction selects the result.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-ifcvt"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Absolute maximum number of instructions allowed per speculated block.
+// This bypasses all other heuristics, so it should be set fairly high.
+static cl::opt<unsigned>
+BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
+ cl::desc("Maximum number of instructions per speculated block."));
+
+// Stress testing mode - disable heuristics.
+static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
+ cl::desc("Turn all knobs to 11"));
+
+typedef SmallSetVector<MachineBasicBlock*, 8> BlockSetVector;
+
+//===----------------------------------------------------------------------===//
+// SSAIfConv
+//===----------------------------------------------------------------------===//
+//
+// The SSAIfConv class performs if-conversion on SSA form machine code after
+// determining if it is possible. The class contains no heuristics; external
+// code should be used to determine when if-conversion is a good idea.
+//
+// SSAIfConv can convert both triangles and diamonds:
+//
+// Triangle: Head Diamond: Head
+// | \ / \_
+// | \ / |
+// | [TF]BB FBB TBB
+// | / \ /
+// | / \ /
+// Tail Tail
+//
+// Instructions in the conditional blocks TBB and/or FBB are spliced into the
+// Head block, and phis in the Tail block are converted to select instructions.
+//
+namespace {
+class SSAIfConv {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+public:
+ /// The block containing the conditional branch.
+ MachineBasicBlock *Head;
+
+ /// The block containing phis after the if-then-else.
+ MachineBasicBlock *Tail;
+
+ /// The 'true' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *TBB;
+
+ /// The 'false' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *FBB;
+
+ /// isTriangle - When there is no 'else' block, either TBB or FBB will be
+ /// equal to Tail.
+ bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+
+ /// Information about each phi in the Tail block.
+ struct PHIInfo {
+ MachineInstr *PHI;
+ unsigned TReg, FReg;
+ // Latencies from Cond+Branch, TReg, and FReg to DstReg.
+ int CondCycles, TCycles, FCycles;
+
+ PHIInfo(MachineInstr *phi)
+ : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
+ };
+
+ SmallVector<PHIInfo, 8> PHIs;
+
+private:
+ /// The branch condition determined by AnalyzeBranch.
+ SmallVector<MachineOperand, 4> Cond;
+
+ /// Instructions in Head that define values used by the conditional blocks.
+ /// The hoisted instructions must be inserted after these instructions.
+ SmallPtrSet<MachineInstr*, 8> InsertAfter;
+
+ /// Register units clobbered by the conditional blocks.
+ BitVector ClobberedRegUnits;
+
+ // Scratch pad for findInsertionPoint.
+ SparseSet<unsigned> LiveRegUnits;
+
+ /// Insertion point in Head for speculatively executed instructions form TBB
+ /// and FBB.
+ MachineBasicBlock::iterator InsertionPoint;
+
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// speculated.
+ bool canSpeculateInstrs(MachineBasicBlock *MBB);
+
+ /// Find a valid insertion point in Head.
+ bool findInsertionPoint();
+
+public:
+ /// runOnMachineFunction - Initialize per-function data structures.
+ void runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LiveRegUnits.clear();
+ LiveRegUnits.setUniverse(TRI->getNumRegUnits());
+ ClobberedRegUnits.clear();
+ ClobberedRegUnits.resize(TRI->getNumRegUnits());
+ }
+
+ /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
+ /// initialize the internal state, and return true.
+ bool canConvertIf(MachineBasicBlock *MBB);
+
+ /// convertIf - If-convert the last block passed to canConvertIf(), assuming
+ /// it is possible. Add any erased blocks to RemovedBlocks.
+ void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
+};
+} // end anonymous namespace
+
+
+/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
+/// be speculated. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator(); I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ DEBUG(dbgs() << "Can't hoist: " << *I);
+ return false;
+ }
+
+ // Don't speculate loads. Note that it may be possible and desirable to
+ // speculate GOT or constant pool loads that are guaranteed not to trap,
+ // but we don't support that for now.
+ if (I->mayLoad()) {
+ DEBUG(dbgs() << "Won't speculate load: " << *I);
+ return false;
+ }
+
+ // We never speculate stores, so an AA pointer isn't necessary.
+ bool DontMoveAcrossStore = true;
+ if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
+ DEBUG(dbgs() << "Can't speculate: " << *I);
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ if (MO->isRegMask()) {
+ DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+
+ // Remember clobbered regunits.
+ if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ ClobberedRegUnits.set(*Units);
+
+ if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI))
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
+ if (DefMI->isTerminator()) {
+ DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
+/// Find an insertion point in Head for the speculated instructions. The
+/// insertion point must be:
+///
+/// 1. Before any terminators.
+/// 2. After any instructions in InsertAfter.
+/// 3. Not have any clobbered regunits live.
+///
+/// This function sets InsertionPoint and returns true when successful, it
+/// returns false if no valid insertion point could be found.
+///
+bool SSAIfConv::findInsertionPoint() {
+ // Keep track of live regunits before the current position.
+ // Only track RegUnits that are also in ClobberedRegUnits.
+ LiveRegUnits.clear();
+ SmallVector<unsigned, 8> Reads;
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ MachineBasicBlock::iterator I = Head->end();
+ MachineBasicBlock::iterator B = Head->begin();
+ while (I != B) {
+ --I;
+ // Some of the conditional code depends in I.
+ if (InsertAfter.count(I)) {
+ DEBUG(dbgs() << "Can't insert code after " << *I);
+ return false;
+ }
+
+ // Update live regunits.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ // We're ignoring regmask operands. That is conservatively correct.
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // I clobbers Reg, so it isn't live before I.
+ if (MO->isDef())
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ LiveRegUnits.erase(*Units);
+ // Unless I reads Reg.
+ if (MO->readsReg())
+ Reads.push_back(Reg);
+ }
+ // Anything read by I is live before I.
+ while (!Reads.empty())
+ for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
+ ++Units)
+ if (ClobberedRegUnits.test(*Units))
+ LiveRegUnits.insert(*Units);
+
+ // We can't insert before a terminator.
+ if (I != FirstTerm && I->isTerminator())
+ continue;
+
+ // Some of the clobbered registers are live before I, not a valid insertion
+ // point.
+ if (!LiveRegUnits.empty()) {
+ DEBUG({
+ dbgs() << "Would clobber";
+ for (SparseSet<unsigned>::const_iterator
+ i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
+ dbgs() << ' ' << PrintRegUnit(*i, TRI);
+ dbgs() << " live before " << *I;
+ });
+ continue;
+ }
+
+ // This is a valid insertion point.
+ InsertionPoint = I;
+ DEBUG(dbgs() << "Can insert before " << *I);
+ return true;
+ }
+ DEBUG(dbgs() << "No legal insertion point found.\n");
+ return false;
+}
+
+
+
+/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
+/// a potential candidate for if-conversion. Fill out the internal state.
+///
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
+ Head = MBB;
+ TBB = FBB = Tail = 0;
+
+ if (Head->succ_size() != 2)
+ return false;
+ MachineBasicBlock *Succ0 = Head->succ_begin()[0];
+ MachineBasicBlock *Succ1 = Head->succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Succ0->pred_size() != 1)
+ std::swap(Succ0, Succ1);
+
+ if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
+ return false;
+
+ // We could support additional Tail predecessors by updating phis instead of
+ // eliminating them. Let's see an example where it matters first.
+ Tail = Succ0->succ_begin()[0];
+ if (Tail->pred_size() != 2)
+ return false;
+
+ // This is not a triangle.
+ if (Tail != Succ1) {
+ // Check for a diamond. We won't deal with any critical edges.
+ if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
+ Succ1->succ_begin()[0] != Tail)
+ return false;
+ DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << "/BB#" << Succ1->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+
+ // Live-in physregs are tricky to get right when speculating code.
+ if (!Tail->livein_empty()) {
+ DEBUG(dbgs() << "Tail has live-ins.\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+ }
+
+ // This is a triangle or a diamond.
+ // If Tail doesn't have any phis, there must be side effects.
+ if (Tail->empty() || !Tail->front().isPHI()) {
+ DEBUG(dbgs() << "No phis in tail.\n");
+ return false;
+ }
+
+ // The branch we're looking to eliminate must be analyzable.
+ Cond.clear();
+ if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) {
+ DEBUG(dbgs() << "Branch not analyzable.\n");
+ return false;
+ }
+
+ // This is weird, probably some sort of degenerate CFG.
+ if (!TBB) {
+ DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
+ return false;
+ }
+
+ // AnalyzeBranch doesn't set FBB on a fall-through branch.
+ // Make sure it is always set.
+ FBB = TBB == Succ0 ? Succ1 : Succ0;
+
+ // Any phis in the tail block must be convertible to selects.
+ PHIs.clear();
+ MachineBasicBlock *TPred = TBB == Tail ? Head : TBB;
+ MachineBasicBlock *FPred = FBB == Tail ? Head : FBB;
+ for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
+ I != E && I->isPHI(); ++I) {
+ PHIs.push_back(&*I);
+ PHIInfo &PI = PHIs.back();
+ // Find PHI operands corresponding to TPred and FPred.
+ for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
+ if (PI.PHI->getOperand(i+1).getMBB() == TPred)
+ PI.TReg = PI.PHI->getOperand(i).getReg();
+ if (PI.PHI->getOperand(i+1).getMBB() == FPred)
+ PI.FReg = PI.PHI->getOperand(i).getReg();
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
+
+ // Get target information.
+ if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
+ PI.CondCycles, PI.TCycles, PI.FCycles)) {
+ DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
+ return false;
+ }
+ }
+
+ // Check that the conditional instructions can be speculated.
+ InsertAfter.clear();
+ ClobberedRegUnits.reset();
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+
+ // Try to find a valid insertion point for the speculated instructions in the
+ // head basic block.
+ if (!findInsertionPoint())
+ return false;
+
+ return true;
+}
+
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail)
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ if (FBB != Tail)
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands.");
+ unsigned DstReg = PI.PHI->getOperand(0).getReg();
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+ PI.PHI->eraseFromParent();
+ PI.PHI = 0;
+ }
+
+ // Fix up the CFG, temporarily leave Head without any successors.
+ Head->removeSuccessor(TBB);
+ Head->removeSuccessor(FBB);
+ if (TBB != Tail)
+ TBB->removeSuccessor(Tail);
+ if (FBB != Tail)
+ FBB->removeSuccessor(Tail);
+
+ // Fix up Head's terminators.
+ // It should become a single branch or a fallthrough.
+ TII->RemoveBranch(*Head);
+
+ // Erase the now empty conditional blocks. It is likely that Head can fall
+ // through to Tail, and we can join the two blocks.
+ if (TBB != Tail) {
+ RemovedBlocks.push_back(TBB);
+ TBB->eraseFromParent();
+ }
+ if (FBB != Tail) {
+ RemovedBlocks.push_back(FBB);
+ FBB->eraseFromParent();
+ }
+
+ assert(Head->succ_empty() && "Additional head successors?");
+ if (Head->isLayoutSuccessor(Tail)) {
+ // Splice Tail onto the end of Head.
+ DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
+ << " into head BB#" << Head->getNumber() << '\n');
+ Head->splice(Head->end(), Tail,
+ Tail->begin(), Tail->end());
+ Head->transferSuccessorsAndUpdatePHIs(Tail);
+ RemovedBlocks.push_back(Tail);
+ Tail->eraseFromParent();
+ } else {
+ // We need a branch to Tail, let code placement work it out later.
+ DEBUG(dbgs() << "Converting to unconditional branch.\n");
+ SmallVector<MachineOperand, 0> EmptyCond;
+ TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL);
+ Head->addSuccessor(Tail);
+ }
+ DEBUG(dbgs() << *Head);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyIfConverter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfConverter : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfConverter() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+ bool tryConvertIf(MachineBasicBlock*);
+ void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
+ void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+};
+} // end anonymous namespace
+
+char EarlyIfConverter::ID = 0;
+char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+
+void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Update the dominator tree after if-conversion erased some blocks.
+void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
+ // TBB and FBB should not dominate any blocks.
+ // Tail children should be transferred to Head.
+ MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ assert(Node != HeadNode && "Cannot erase the head node");
+ while (Node->getNumChildren()) {
+ assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
+ DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
+ }
+ DomTree->eraseNode(Removed[i]);
+ }
+}
+
+/// Update LoopInfo after if-conversion.
+void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
+ if (!Loops)
+ return;
+ // If-conversion doesn't change loop structure, and it doesn't mess with back
+ // edges, so updating LoopInfo is simply removing the dead blocks.
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i)
+ Loops->removeBlock(Removed[i]);
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB)) {
+ // If-convert MBB and update analyses.
+ SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks);
+ Changed = true;
+ updateDomTree(RemovedBlocks);
+ updateLoops(RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
+ << "********** Function: "
+ << ((Value*)MF.getFunction())->getName() << '\n');
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (po_iterator<MachineDominatorTree*>
+ I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
+ if (tryConvertIf(I->getBlock()))
+ Changed = true;
+
+ MF.verify(this, "After early if-conversion");
+ return Changed;
+}
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index a48c540..fee8e47 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -59,7 +59,7 @@ struct DomainValue {
// Pointer to the next DomainValue in a chain. When two DomainValues are
// merged, Victim.Next is set to point to Victor, so old DomainValue
- // references can be updated by folowing the chain.
+ // references can be updated by following the chain.
DomainValue *Next;
// Twiddleable instructions using or defining these registers.
@@ -666,7 +666,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// or -1.
AliasMap.resize(TRI->getNumRegs(), -1);
for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
- for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
+ for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
+ AI.isValid(); ++AI)
AliasMap[*AI] = i;
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 75ae5b9..4214ba1 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -155,7 +156,9 @@ namespace {
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
const MachineBranchProbabilityInfo *MBPI;
+ MachineRegisterInfo *MRI;
+ bool PreRegAlloc;
bool MadeChange;
int FnNum;
public:
@@ -263,14 +266,20 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MRI = &MF.getRegInfo();
InstrItins = MF.getTarget().getInstrItineraryData();
if (!TII) return false;
- // Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true, false);
- bool BFChange = BF.OptimizeFunction(MF, TII,
+ PreRegAlloc = MRI->isSSA();
+
+ bool BFChange = false;
+ if (!PreRegAlloc) {
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true, false);
+ BFChange = BF.OptimizeFunction(MF, TII,
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
+ }
DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
<< MF.getFunction()->getName() << "\'");
@@ -621,7 +630,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
if (BBI.IsDone)
return;
- bool AlreadyPredicated = BBI.Predicate.size() > 0;
+ bool AlreadyPredicated = !BBI.Predicate.empty();
// First analyze the end of BB branches.
BBI.TrueBB = BBI.FalseBB = NULL;
BBI.BrCond.clear();
@@ -786,8 +795,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
unsigned Dups = 0;
unsigned Dups2 = 0;
- bool TNeedSub = TrueBBI.Predicate.size() > 0;
- bool FNeedSub = FalseBBI.Predicate.size() > 0;
+ bool TNeedSub = !TrueBBI.Predicate.empty();
+ bool FNeedSub = !FalseBBI.Predicate.empty();
bool Enqueued = false;
BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
@@ -962,9 +971,8 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
E = BB->livein_end(); I != E; ++I) {
unsigned Reg = *I;
Redefs.insert(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- Redefs.insert(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.insert(*SubRegs);
}
}
@@ -983,8 +991,8 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
Defs.push_back(Reg);
else if (MO.isKill()) {
Redefs.erase(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- Redefs.erase(*SR);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.erase(*SubRegs);
}
}
for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
@@ -993,11 +1001,12 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
if (AddImpUse)
// Treat predicated update as read + write.
MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
- true/*IsImp*/,false/*IsKill*/));
+ true/*IsImp*/,false/*IsKill*/,
+ false/*IsDead*/,true/*IsUndef*/));
} else {
Redefs.insert(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- Redefs.insert(*SR);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.insert(*SubRegs);
}
}
}
@@ -1335,8 +1344,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// These are defined before ctrl flow reach the 'false' instructions.
// They cannot be modified by the 'true' instructions.
ExtUses.insert(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- ExtUses.insert(*SR);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ ExtUses.insert(*SubRegs);
}
}
@@ -1344,8 +1353,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
unsigned Reg = Defs[i];
if (!ExtUses.count(Reg)) {
RedefsByFalse.insert(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- RedefsByFalse.insert(*SR);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RedefsByFalse.insert(*SubRegs);
}
}
}
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index d5ea666..07e37af 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -52,7 +52,6 @@ static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
namespace {
class InlineSpiller : public Spiller {
- MachineFunctionPass &Pass;
MachineFunction &MF;
LiveIntervals &LIS;
LiveStacks &LSS;
@@ -137,8 +136,7 @@ public:
InlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm)
- : Pass(pass),
- MF(mf),
+ : MF(mf),
LIS(pass.getAnalysis<LiveIntervals>()),
LSS(pass.getAnalysis<LiveStacks>()),
AA(&pass.getAnalysis<AliasAnalysis>()),
@@ -578,11 +576,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
if (isSibling(SrcReg)) {
LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true));
- assert(SrcLR && "Copy from non-existing value");
+ LiveRangeQuery SrcQ(SrcLI, VNI->def);
+ assert(SrcQ.valueIn() && "Copy from non-existing value");
// Check if this COPY kills its source.
- SVI->second.KillsSource = (SrcLR->end == VNI->def);
- VNInfo *SrcVNI = SrcLR->valno;
+ SVI->second.KillsSource = SrcQ.isKill();
+ VNInfo *SrcVNI = SrcQ.valueIn();
DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
<< SrcVNI->id << '@' << SrcVNI->def
<< " kill=" << unsigned(SVI->second.KillsSource) << '\n');
@@ -1083,6 +1081,10 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to load instruction.
SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
+ // Some (out-of-tree) targets have EC reload instructions.
+ if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg))
+ if (MO->isEarlyClobber())
+ LoadIdx = LoadIdx.getRegSlot(true);
DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
@@ -1275,8 +1277,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
DEBUG(dbgs() << "Inline spilling "
<< MRI.getRegClass(edit.getReg())->getName()
- << ':' << edit.getParent() << "\nFrom original "
- << LIS.getInterval(Original) << '\n');
+ << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent()
+ << "\nFrom original " << LIS.getInterval(Original) << '\n');
assert(edit.getParent().isSpillable() &&
"Attempting to spill already spilled value.");
assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 8368b58..1541bf0 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -39,7 +39,7 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
unsigned E = PhysRegEntries[PhysReg];
if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
if (!Entries[E].valid(LIUArray, TRI))
- Entries[E].revalidate();
+ Entries[E].revalidate(LIUArray, TRI);
return &Entries[E];
}
// No valid entry exists, pick the next round-robin entry.
@@ -61,13 +61,15 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
}
/// revalidate - LIU contents have changed, update tags.
-void InterferenceCache::Entry::revalidate() {
+void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
// Invalidate all block entries.
++Tag;
// Invalidate all iterators.
PrevPos = SlotIndex();
- for (unsigned i = 0, e = Aliases.size(); i != e; ++i)
- Aliases[i].second = Aliases[i].first->getTag();
+ unsigned i = 0;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i)
+ RegUnits[i].VirtTag = LIUArray[*Units].getTag();
}
void InterferenceCache::Entry::reset(unsigned physReg,
@@ -79,28 +81,23 @@ void InterferenceCache::Entry::reset(unsigned physReg,
++Tag;
PhysReg = physReg;
Blocks.resize(MF->getNumBlockIDs());
- Aliases.clear();
- for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
- LiveIntervalUnion *LIU = LIUArray + *AS;
- Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
- }
// Reset iterators.
PrevPos = SlotIndex();
- unsigned e = Aliases.size();
- Iters.resize(e);
- for (unsigned i = 0; i != e; ++i)
- Iters[i].setMap(Aliases[i].first->getMap());
+ RegUnits.clear();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ RegUnits.push_back(LIUArray[*Units]);
+ RegUnits.back().Fixed = &LIS->getRegUnit(*Units);
+ }
}
bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
const TargetRegisterInfo *TRI) {
- unsigned i = 0, e = Aliases.size();
- for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
- LiveIntervalUnion *LIU = LIUArray + *AS;
- if (i == e || Aliases[i].first != LIU)
+ unsigned i = 0, e = RegUnits.size();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) {
+ if (i == e)
return false;
- if (LIU->changedSince(Aliases[i].second))
+ if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag))
return false;
}
return i == e;
@@ -112,12 +109,20 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
// Use advanceTo only when possible.
if (PrevPos != Start) {
- if (!PrevPos.isValid() || Start < PrevPos)
- for (unsigned i = 0, e = Iters.size(); i != e; ++i)
- Iters[i].find(Start);
- else
- for (unsigned i = 0, e = Iters.size(); i != e; ++i)
- Iters[i].advanceTo(Start);
+ if (!PrevPos.isValid() || Start < PrevPos) {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.find(Start);
+ RUI.FixedI = RUI.Fixed->find(Start);
+ }
+ } else {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.advanceTo(Start);
+ if (RUI.FixedI != RUI.Fixed->end())
+ RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start);
+ }
+ }
PrevPos = Start;
}
@@ -129,9 +134,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
BI->Tag = Tag;
BI->First = BI->Last = SlotIndex();
- // Check for first interference.
- for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
- Iter &I = Iters[i];
+ // Check for first interference from virtregs.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
if (!I.valid())
continue;
SlotIndex StartI = I.start();
@@ -141,6 +146,19 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
BI->First = StartI;
}
+ // Same thing for fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::const_iterator I = RegUnits[i].FixedI;
+ LiveInterval::const_iterator E = RegUnits[i].Fixed->end();
+ if (I == E)
+ continue;
+ SlotIndex StartI = I->start;
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
// Also check for register mask interference.
RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
@@ -168,8 +186,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
}
// Check for last interference in block.
- for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
- Iter &I = Iters[i];
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
if (!I.valid() || I.start() >= Stop)
continue;
I.advanceTo(Stop);
@@ -183,6 +201,23 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
++I;
}
+ // Fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::iterator &I = RegUnits[i].FixedI;
+ LiveInterval *LI = RegUnits[i].Fixed;
+ if (I == LI->end() || I->start >= Stop)
+ continue;
+ I = LI->advanceTo(I, Stop);
+ bool Backup = I == LI->end() || I->start >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I->end;
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
// Also check for register mask interference.
SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
for (unsigned i = RegMaskSlots.size();
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 485a325..3c928a5 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+// InterferenceCache remembers per-block interference from LiveIntervalUnions,
+// fixed RegUnit interference, and register masks.
//
//===----------------------------------------------------------------------===//
@@ -59,14 +60,31 @@ class InterferenceCache {
/// PrevPos - The previous position the iterators were moved to.
SlotIndex PrevPos;
- /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of
- /// PhysReg.
- SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases;
+ /// RegUnitInfo - Information tracked about each RegUnit in PhysReg.
+ /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos)
+ /// had just been called.
+ struct RegUnitInfo {
+ /// Iterator pointing into the LiveIntervalUnion containing virtual
+ /// register interference.
+ LiveIntervalUnion::SegmentIter VirtI;
- typedef LiveIntervalUnion::SegmentIter Iter;
+ /// Tag of the LIU last time we looked.
+ unsigned VirtTag;
- /// Iters - an iterator for each alias
- SmallVector<Iter, 8> Iters;
+ /// Fixed interference in RegUnit.
+ LiveInterval *Fixed;
+
+ /// Iterator pointing into the fixed RegUnit interference.
+ LiveInterval::iterator FixedI;
+
+ RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) {
+ VirtI.setMap(LIU.getMap());
+ }
+ };
+
+ /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have
+ /// more than 4 RegUnits.
+ SmallVector<RegUnitInfo, 4> RegUnits;
/// Blocks - Interference for each block in the function.
SmallVector<BlockInterference, 8> Blocks;
@@ -91,7 +109,7 @@ class InterferenceCache {
bool hasRefs() const { return RefCount > 0; }
- void revalidate();
+ void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
/// valid - Return true if this is a valid entry for physReg.
bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index a9ca42f..8d2282a 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -11,17 +11,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/SmallVector.h"
using namespace llvm;
template <class ArgIt>
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index a1f479a..cac0c83 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/PassManager.h"
+#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -78,40 +79,15 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
"and that InitializeAllTargetMCs() is being invoked!");
}
-/// Turn exception handling constructs into something the code generators can
-/// handle.
-static void addPassesToHandleExceptions(TargetMachine *TM,
- PassManagerBase &PM) {
- switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
- case ExceptionHandling::SjLj:
- // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
- // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
- // catch info can get misplaced when a selector ends up more than one block
- // removed from the parent invoke(s). This could happen when a landing
- // pad is shared by multiple invokes and is also a target of a normal
- // edge from elsewhere.
- PM.add(createSjLjEHPreparePass(TM->getTargetLowering()));
- // FALLTHROUGH
- case ExceptionHandling::DwarfCFI:
- case ExceptionHandling::ARM:
- case ExceptionHandling::Win64:
- PM.add(createDwarfEHPass(TM));
- break;
- case ExceptionHandling::None:
- PM.add(createLowerInvokePass(TM->getTargetLowering()));
-
- // The lower invoke pass may create unreachable code. Remove it.
- PM.add(createUnreachableBlockEliminationPass());
- break;
- }
-}
-
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
PassManagerBase &PM,
- bool DisableVerify) {
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
// Targets may override createPassConfig to provide a target-specific sublass.
TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+ PassConfig->setStartStopPasses(StartAfter, StopAfter);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
@@ -120,7 +96,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
PassConfig->addIRPasses();
- addPassesToHandleExceptions(TM, PM);
+ PassConfig->addPassesToHandleExceptions();
PassConfig->addISelPrepare();
@@ -155,16 +131,30 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- bool DisableVerify) {
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
// Add common CodeGen passes.
- MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
+ StartAfter, StopAfter);
if (!Context)
return true;
+ if (StopAfter) {
+ // FIXME: The intent is that this should eventually write out a YAML file,
+ // containing the LLVM IR, the machine-level IR (when stopping after a
+ // machine-level pass), and whatever other information is needed to
+ // deserialize the code and resume compilation. For now, just write the
+ // LLVM IR.
+ PM.add(createPrintModulePass(&Out));
+ return false;
+ }
+
if (hasMCSaveTempLabels())
Context->setAllowTemporaryLabels(false);
const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCRegisterInfo &MRI = *getRegisterInfo();
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
OwningPtr<MCStreamer> AsmStreamer;
@@ -180,7 +170,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
MCAsmBackend *MAB = 0;
if (ShowMCEncoding) {
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, *Context);
+ MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI,
+ *Context);
MAB = getTarget().createMCAsmBackend(getTargetTriple());
}
@@ -198,8 +189,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI,
- *Context);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
+ STI, *Context);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
if (MCE == 0 || MAB == 0)
return true;
@@ -242,7 +233,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
bool DisableVerify) {
// Add common CodeGen passes.
- MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
if (!Context)
return true;
@@ -262,7 +253,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
raw_ostream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- Ctx = addPassesToGenerateCode(this, PM, DisableVerify);
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
if (!Ctx)
return true;
@@ -271,9 +262,10 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
+ const MCRegisterInfo &MRI = *getRegisterInfo();
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI,
- *Ctx);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
+ STI, *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
if (MCE == 0 || MAB == 0)
return true;
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index f1abcbb..6b6b9d0 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -16,8 +16,8 @@
#define DEBUG_TYPE "lexicalscopes"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 2187833..d631726 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -23,9 +23,9 @@
#include "LiveDebugVariables.h"
#include "VirtRegMap.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Metadata.h"
#include "llvm/Value.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LexicalScopes.h"
@@ -243,7 +243,7 @@ public:
/// computeIntervals - Compute the live intervals of all locations after
/// collecting all their def points.
- void computeIntervals(MachineRegisterInfo &MRI,
+ void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
LiveIntervals &LIS, MachineDominatorTree &MDT,
UserValueScopes &UVS);
@@ -618,6 +618,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
void
UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
LiveIntervals &LIS,
MachineDominatorTree &MDT,
UserValueScopes &UVS) {
@@ -634,15 +635,32 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
unsigned LocNo = Defs[i].second;
const MachineOperand &Loc = locations[LocNo];
+ if (!Loc.isReg()) {
+ extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
+ continue;
+ }
+
// Register locations are constrained to where the register value is live.
- if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
- LiveInterval *LI = &LIS.getInterval(Loc.getReg());
- const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+ LiveInterval *LI = 0;
+ const VNInfo *VNI = 0;
+ if (LIS.hasInterval(Loc.getReg())) {
+ LI = &LIS.getInterval(Loc.getReg());
+ VNI = LI->getVNInfoAt(Idx);
+ }
SmallVector<SlotIndex, 16> Kills;
extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
- addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
- } else
- extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
+ if (LI)
+ addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+ continue;
+ }
+
+ // For physregs, use the live range of the first regunit as a guide.
+ unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI);
+ LiveInterval *LI = &LIS.getRegUnit(Unit);
+ const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ // Don't track copies from physregs, it is too expensive.
+ extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS);
}
// Finally, erase all the undefs.
@@ -656,7 +674,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
void LDVImpl::computeIntervals() {
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
- userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT, UVS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS);
userValues[i]->mapVirtRegs(this);
}
}
@@ -721,7 +739,8 @@ renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
if (TargetRegisterInfo::isVirtualRegister(NewReg))
mapVirtReg(NewReg, UV);
- virtRegToEqClass.erase(OldReg);
+ if (OldReg != NewReg)
+ virtRegToEqClass.erase(OldReg);
do {
UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index ac18843..01077db 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -48,6 +48,26 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
return I;
}
+VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
+ VNInfo::Allocator &VNInfoAllocator) {
+ assert(!Def.isDead() && "Cannot define a value at the dead slot");
+ iterator I = find(Def);
+ if (I == end()) {
+ VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
+ ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+ if (SlotIndex::isSameInstr(Def, I->start)) {
+ assert(I->start == Def && "Cannot insert def, already live");
+ assert(I->valno->def == Def && "Inconsistent existing value def");
+ return I->valno;
+ }
+ assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
+ VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
+ ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+}
+
/// killedInRange - Return true if the interval has kills in [Start,End).
bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
Ranges::const_iterator r =
@@ -176,16 +196,16 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
// If NewEnd was in the middle of an interval, make sure to get its endpoint.
I->end = std::max(NewEnd, prior(MergeTo)->end);
- // Erase any dead ranges.
- ranges.erase(llvm::next(I), MergeTo);
-
// If the newly formed range now touches the range after it and if they have
// the same value number, merge the two ranges into one range.
- Ranges::iterator Next = llvm::next(I);
- if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
- I->end = Next->end;
- ranges.erase(Next);
+ if (MergeTo != ranges.end() && MergeTo->start <= I->end &&
+ MergeTo->valno == ValNo) {
+ I->end = MergeTo->end;
+ ++MergeTo;
}
+
+ // Erase any dead ranges.
+ ranges.erase(llvm::next(I), MergeTo);
}
@@ -353,18 +373,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
markValNoForDeletion(ValNo);
}
-/// findDefinedVNInfo - Find the VNInfo defined by the specified
-/// index (register interval).
-VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
- for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
- i != e; ++i) {
- if ((*i)->def == Idx)
- return *i;
- }
-
- return 0;
-}
-
/// join - Join two live intervals (this, and other) together. This applies
/// mappings to the value numbers in the LHS/RHS intervals as specified. If
/// the intervals are not joinable, this aborts.
@@ -373,6 +381,8 @@ void LiveInterval::join(LiveInterval &Other,
const int *RHSValNoAssignments,
SmallVector<VNInfo*, 16> &NewVNInfo,
MachineRegisterInfo *MRI) {
+ verify();
+
// Determine if any of our live range values are mapped. This is uncommon, so
// we want to avoid the interval scan if not.
bool MustMapCurValNos = false;
@@ -440,16 +450,148 @@ void LiveInterval::join(LiveInterval &Other,
valnos.resize(NumNewVals); // shrinkify
// Okay, now insert the RHS live ranges into the LHS.
- iterator InsertPos = begin();
unsigned RangeNo = 0;
for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
// Map the valno in the other live range to the current live range.
I->valno = NewVNInfo[OtherAssignments[RangeNo]];
assert(I->valno && "Adding a dead range?");
- InsertPos = addRangeFrom(*I, InsertPos);
+ }
+ mergeIntervalRanges(Other);
+
+ verify();
+}
+
+/// \brief Helper function for merging in another LiveInterval's ranges.
+///
+/// This is a helper routine implementing an efficient merge of another
+/// LiveIntervals ranges into the current interval.
+///
+/// \param LHSValNo If non-NULL, set as the new value number for every range
+/// from RHS which is merged into the LHS.
+/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value
+/// number maches this value number will be merged into LHS.
+void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS,
+ VNInfo *LHSValNo,
+ const VNInfo *RHSValNo) {
+ if (RHS.empty())
+ return;
+
+ // Ensure we're starting with a valid range. Note that we don't verify RHS
+ // because it may have had its value numbers adjusted in preparation for
+ // merging.
+ verify();
+
+ // The strategy for merging these efficiently is as follows:
+ //
+ // 1) Find the beginning of the impacted ranges in the LHS.
+ // 2) Create a new, merged sub-squence of ranges merging from the position in
+ // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS
+ // entries being merged will be copied into this new range.
+ // 3) Replace the relevant section in LHS with these newly merged ranges.
+ // 4) Append any remaning ranges from RHS if LHS is exhausted in #2.
+ //
+ // We don't follow the typical in-place merge strategy for sorted ranges of
+ // appending the new ranges to the back and then using std::inplace_merge
+ // because one step of the merge can both mutate the original elements and
+ // remove elements from the original. Essentially, because the merge includes
+ // collapsing overlapping ranges, a more complex approach is required.
+
+ // We do an initial binary search to optimize for a common pattern: a large
+ // LHS, and a very small RHS.
+ const_iterator RI = RHS.begin(), RE = RHS.end();
+ iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI);
+
+ // Merge into NewRanges until one of the ranges is exhausted.
+ SmallVector<LiveRange, 4> NewRanges;
+
+ // Keep track of where to begin the replacement.
+ iterator ReplaceI = LI;
+
+ // If there are preceding ranges in the LHS, put the last one into NewRanges
+ // so we can optionally extend it. Adjust the replacement point accordingly.
+ if (LI != begin()) {
+ ReplaceI = llvm::prior(LI);
+ NewRanges.push_back(*ReplaceI);
+ }
+
+ // Now loop over the mergable portions of both LHS and RHS, merging into
+ // NewRanges.
+ while (LI != LE && RI != RE) {
+ // Skip incoming ranges with the wrong value.
+ if (RHSValNo && RI->valno != RHSValNo) {
+ ++RI;
+ continue;
+ }
+
+ // Select the first range. We pick the earliest start point, and then the
+ // largest range.
+ LiveRange R = *LI;
+ if (*RI < R) {
+ R = *RI;
+ ++RI;
+ if (LHSValNo)
+ R.valno = LHSValNo;
+ } else {
+ ++LI;
+ }
+
+ if (NewRanges.empty()) {
+ NewRanges.push_back(R);
+ continue;
+ }
+
+ LiveRange &LastR = NewRanges.back();
+ if (R.valno == LastR.valno) {
+ // Try to merge this range into the last one.
+ if (R.start <= LastR.end) {
+ LastR.end = std::max(LastR.end, R.end);
+ continue;
+ }
+ } else {
+ // We can't merge ranges across a value number.
+ assert(R.start >= LastR.end &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+
+ // If all else fails, just append the range.
+ NewRanges.push_back(R);
+ }
+ assert(RI == RE || LI == LE);
+
+ // Check for being able to merge into the trailing sequence of ranges on the LHS.
+ if (!NewRanges.empty())
+ for (; LI != LE && (LI->valno == NewRanges.back().valno &&
+ LI->start <= NewRanges.back().end);
+ ++LI)
+ NewRanges.back().end = std::max(NewRanges.back().end, LI->end);
+
+ // Replace the ranges in the LHS with the newly merged ones. It would be
+ // really nice if there were a move-supporting 'replace' directly in
+ // SmallVector, but as there is not, we pay the price of copies to avoid
+ // wasted memory allocations.
+ SmallVectorImpl<LiveRange>::iterator NRI = NewRanges.begin(),
+ NRE = NewRanges.end();
+ for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI)
+ *ReplaceI = *NRI;
+ if (NRI == NRE)
+ ranges.erase(ReplaceI, LI);
+ else
+ ranges.insert(LI, NRI, NRE);
+
+ // And finally insert any trailing end of RHS (if we have one).
+ for (; RI != RE; ++RI) {
+ LiveRange R = *RI;
+ if (LHSValNo)
+ R.valno = LHSValNo;
+ if (!ranges.empty() &&
+ ranges.back().valno == R.valno && R.start <= ranges.back().end)
+ ranges.back().end = std::max(ranges.back().end, R.end);
+ else
+ ranges.push_back(R);
}
- ComputeJoinedWeight(Other);
+ // Ensure we finished with a valid new sequence of ranges.
+ verify();
}
/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
@@ -458,38 +600,20 @@ void LiveInterval::join(LiveInterval &Other,
/// the overlapping LiveRanges have the specified value number.
void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
VNInfo *LHSValNo) {
- // TODO: Make this more efficient.
- iterator InsertPos = begin();
- for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- // Map the valno in the other live range to the current live range.
- LiveRange Tmp = *I;
- Tmp.valno = LHSValNo;
- InsertPos = addRangeFrom(Tmp, InsertPos);
- }
+ mergeIntervalRanges(RHS, LHSValNo);
}
-
/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
/// in RHS into this live interval as the specified value number.
/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
/// current interval, it will replace the value numbers of the overlaped
/// live ranges with the specified value number.
-void LiveInterval::MergeValueInAsValue(
- const LiveInterval &RHS,
- const VNInfo *RHSValNo, VNInfo *LHSValNo) {
- // TODO: Make this more efficient.
- iterator InsertPos = begin();
- for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- if (I->valno != RHSValNo)
- continue;
- // Map the valno in the other live range to the current live range.
- LiveRange Tmp = *I;
- Tmp.valno = LHSValNo;
- InsertPos = addRangeFrom(Tmp, InsertPos);
- }
+void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
+ const VNInfo *RHSValNo,
+ VNInfo *LHSValNo) {
+ mergeIntervalRanges(RHS, LHSValNo, RHSValNo);
}
-
/// MergeValueNumberInto - This method is called when two value nubmers
/// are found to be equivalent. This eliminates V1, replacing all
/// LiveRanges with the V1 value number with the V2 value number. This can
@@ -569,6 +693,8 @@ void LiveInterval::Copy(const LiveInterval &RHS,
const LiveRange &LR = RHS.ranges[i];
addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
}
+
+ verify();
}
unsigned LiveInterval::getSize() const {
@@ -578,29 +704,6 @@ unsigned LiveInterval::getSize() const {
return Sum;
}
-/// ComputeJoinedWeight - Set the weight of a live interval Joined
-/// after Other has been merged into it.
-void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) {
- // If either of these intervals was spilled, the weight is the
- // weight of the non-spilled interval. This can only happen with
- // iterative coalescers.
-
- if (Other.weight != HUGE_VALF) {
- weight += Other.weight;
- }
- else if (weight == HUGE_VALF &&
- !TargetRegisterInfo::isPhysicalRegister(reg)) {
- // Remove this assert if you have an iterative coalescer
- assert(0 && "Joining to spilled interval");
- weight = Other.weight;
- }
- else {
- // Otherwise the weight stays the same
- // Remove this assert if you have an iterative coalescer
- assert(0 && "Joining from spilled interval");
- }
-}
-
raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
}
@@ -609,15 +712,10 @@ void LiveRange::dump() const {
dbgs() << *this << "\n";
}
-void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
- OS << PrintReg(reg, TRI);
- if (weight != 0)
- OS << ',' << weight;
-
+void LiveInterval::print(raw_ostream &OS) const {
if (empty())
- OS << " EMPTY";
+ OS << "EMPTY";
else {
- OS << " = ";
for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
E = ranges.end(); I != E; ++I) {
OS << *I;
@@ -651,6 +749,23 @@ void LiveInterval::dump() const {
dbgs() << *this << "\n";
}
+#ifndef NDEBUG
+void LiveInterval::verify() const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ assert(I->start.isValid());
+ assert(I->end.isValid());
+ assert(I->start < I->end);
+ assert(I->valno != 0);
+ assert(I->valno == valnos[I->valno->id]);
+ if (llvm::next(I) != E) {
+ assert(I->end <= llvm::next(I)->start);
+ if (I->end == llvm::next(I)->start)
+ assert(I->valno != llvm::next(I)->valno);
+ }
+ }
+}
+#endif
+
void LiveRange::print(raw_ostream &os) const {
os << *this;
@@ -718,7 +833,10 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
SlotIndex Idx = LIS.getInstructionIndex(MI);
Idx = Idx.getRegSlot(MO.isUse());
const VNInfo *VNI = LI.getVNInfoAt(Idx);
- assert(VNI && "Interval not live at use.");
+ // FIXME: We should be able to assert(VNI) here, but the coalescer leaves
+ // dangling defs around.
+ if (!VNI)
+ continue;
MO.setReg(LIV[getEqClass(VNI)]->reg);
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 934cc12..819707f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -20,30 +20,24 @@
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "LiveRangeCalc.h"
#include <algorithm>
#include <limits>
#include <cmath>
using namespace llvm;
-// Hidden options for help debugging.
-static cl::opt<bool> DisableReMat("disable-rematerialization",
- cl::init(false), cl::Hidden);
-
-STATISTIC(numIntervals , "Number of original intervals");
-
char LiveIntervals::ID = 0;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
@@ -61,23 +55,35 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LiveVariables>();
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequiredTransitiveID(MachineDominatorsID);
AU.addPreservedID(MachineDominatorsID);
AU.addPreserved<SlotIndexes>();
AU.addRequiredTransitive<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+LiveIntervals::LiveIntervals() : MachineFunctionPass(ID),
+ DomTree(0), LRCalc(0) {
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+}
+
+LiveIntervals::~LiveIntervals() {
+ delete LRCalc;
+}
+
void LiveIntervals::releaseMemory() {
// Free the live intervals themselves.
- for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
- E = r2iMap_.end(); I != E; ++I)
- delete I->second;
-
- r2iMap_.clear();
+ for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
+ delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)];
+ VirtRegIntervals.clear();
RegMaskSlots.clear();
RegMaskBits.clear();
RegMaskBlocks.clear();
+ for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
+ delete RegUnitIntervals[i];
+ RegUnitIntervals.clear();
+
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
}
@@ -85,20 +91,22 @@ void LiveIntervals::releaseMemory() {
/// runOnMachineFunction - Register allocate the whole function
///
bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
- mf_ = &fn;
- mri_ = &mf_->getRegInfo();
- tm_ = &fn.getTarget();
- tri_ = tm_->getRegisterInfo();
- tii_ = tm_->getInstrInfo();
- aa_ = &getAnalysis<AliasAnalysis>();
- lv_ = &getAnalysis<LiveVariables>();
- indexes_ = &getAnalysis<SlotIndexes>();
- allocatableRegs_ = tri_->getAllocatableSet(fn);
- reservedRegs_ = tri_->getReservedRegs(fn);
+ MF = &fn;
+ MRI = &MF->getRegInfo();
+ TM = &fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ LV = &getAnalysis<LiveVariables>();
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ if (!LRCalc)
+ LRCalc = new LiveRangeCalc();
+ AllocatableRegs = TRI->getAllocatableSet(fn);
+ ReservedRegs = TRI->getReservedRegs(fn);
computeIntervals();
-
- numIntervals += getNumIntervals();
+ computeLiveInRegUnits();
DEBUG(dump());
return true;
@@ -108,27 +116,24 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << "********** INTERVALS **********\n";
- // Dump the physregs.
- for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg)
- if (const LiveInterval *LI = r2iMap_.lookup(Reg)) {
- LI->print(OS, tri_);
- OS << '\n';
- }
+ // Dump the regunits.
+ for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
+ if (LiveInterval *LI = RegUnitIntervals[i])
+ OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n';
// Dump the virtregs.
- for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg)
- if (const LiveInterval *LI =
- r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) {
- LI->print(OS, tri_);
- OS << '\n';
- }
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (hasInterval(Reg))
+ OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
+ }
printInstrs(OS);
}
void LiveIntervals::printInstrs(raw_ostream &OS) const {
OS << "********** MACHINEINSTRS **********\n";
- mf_->print(OS, indexes_);
+ MF->print(OS, Indexes);
}
void LiveIntervals::dumpInstrs() const {
@@ -176,13 +181,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineOperand& MO,
unsigned MOIdx,
LiveInterval &interval) {
- DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI));
// Virtual registers may be defined multiple times (due to phi
// elimination and 2-addr elimination). Much of what we do only has to be
// done once for the vreg. We use an empty interval to detect the first
// time we see a vreg.
- LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+ LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg);
if (interval.empty()) {
// Get the Idx of the defining instructions.
SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
@@ -226,11 +231,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DEBUG(dbgs() << " +" << NewLR);
interval.addRange(NewLR);
- bool PHIJoin = lv_->isPHIJoin(interval.reg);
+ bool PHIJoin = LV->isPHIJoin(interval.reg);
if (PHIJoin) {
- // A phi join register is killed at the end of the MBB and revived as a new
- // valno in the killing blocks.
+ // A phi join register is killed at the end of the MBB and revived as a
+ // new valno in the killing blocks.
assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
DEBUG(dbgs() << " phi-join");
ValNo->setHasPHIKill(true);
@@ -240,8 +245,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// live interval.
for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
E = vi.AliveBlocks.end(); I != E; ++I) {
- MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
- LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+ MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I);
+ LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock),
+ ValNo);
interval.addRange(LR);
DEBUG(dbgs() << " +" << LR);
}
@@ -319,11 +325,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
OldValNo));
- DEBUG({
- dbgs() << " RESULT: ";
- interval.print(dbgs(), tri_);
- });
- } else if (lv_->isPHIJoin(interval.reg)) {
+ DEBUG(dbgs() << " RESULT: " << interval);
+ } else if (LV->isPHIJoin(interval.reg)) {
// In the case of PHI elimination, each variable definition is only
// live until the end of the block. We've already taken care of the
// rest of the live range.
@@ -347,101 +350,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DEBUG(dbgs() << '\n');
}
-static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) {
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end();
- SI != SE; ++SI) {
- const MachineBasicBlock* succ = *SI;
- if (succ->isLiveIn(Reg))
- return true;
- }
- return false;
-}
-
-void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator mi,
- SlotIndex MIIdx,
- MachineOperand& MO,
- LiveInterval &interval) {
- DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
-
- SlotIndex baseIndex = MIIdx;
- SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber());
- SlotIndex end = start;
-
- // If it is not used after definition, it is considered dead at
- // the instruction defining it. Hence its interval is:
- // [defSlot(def), defSlot(def)+1)
- // For earlyclobbers, the defSlot was pushed back one; the extra
- // advance below compensates.
- if (MO.isDead()) {
- DEBUG(dbgs() << " dead");
- end = start.getDeadSlot();
- goto exit;
- }
-
- // If it is not dead on definition, it must be killed by a
- // subsequent instruction. Hence its interval is:
- // [defSlot(def), useSlot(kill)+1)
- baseIndex = baseIndex.getNextIndex();
- while (++mi != MBB->end()) {
-
- if (mi->isDebugValue())
- continue;
- if (getInstructionFromIndex(baseIndex) == 0)
- baseIndex = indexes_->getNextNonNullIndex(baseIndex);
-
- if (mi->killsRegister(interval.reg, tri_)) {
- DEBUG(dbgs() << " killed");
- end = baseIndex.getRegSlot();
- goto exit;
- } else {
- int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
- if (DefIdx != -1) {
- if (mi->isRegTiedToUseOperand(DefIdx)) {
- // Two-address instruction.
- end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber());
- } else {
- // Another instruction redefines the register before it is ever read.
- // Then the register is essentially dead at the instruction that
- // defines it. Hence its interval is:
- // [defSlot(def), defSlot(def)+1)
- DEBUG(dbgs() << " dead");
- end = start.getDeadSlot();
- }
- goto exit;
- }
- }
-
- baseIndex = baseIndex.getNextIndex();
- }
-
- // If we get here the register *should* be live out.
- assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!");
-
- // FIXME: We need saner rules for reserved regs.
- if (isReserved(interval.reg)) {
- end = start.getDeadSlot();
- } else {
- // Unreserved, unallocable registers like EFLAGS can be live across basic
- // block boundaries.
- assert(isRegLiveIntoSuccessor(MBB, interval.reg) &&
- "Unreserved reg not live-out?");
- end = getMBBEndIdx(MBB);
- }
-exit:
- assert(start < end && "did not find end of interval?");
-
- // Already exists? Extend old live interval.
- VNInfo *ValNo = interval.getVNInfoAt(start);
- bool Extend = ValNo != 0;
- if (!Extend)
- ValNo = interval.getNextValue(start, VNInfoAllocator);
- LiveRange LR(start, end, ValNo);
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR << '\n');
-}
-
void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MI,
SlotIndex MIIdx,
@@ -450,93 +358,6 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
getOrCreateInterval(MO.getReg()));
- else
- handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
- getOrCreateInterval(MO.getReg()));
-}
-
-void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
- SlotIndex MIIdx,
- LiveInterval &interval) {
- assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) &&
- "Only physical registers can be live in.");
- assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() ||
- MBB->isLandingPad()) &&
- "Allocatable live-ins only valid for entry blocks and landing pads.");
-
- DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
-
- // Look for kills, if it reaches a def before it's killed, then it shouldn't
- // be considered a livein.
- MachineBasicBlock::iterator mi = MBB->begin();
- MachineBasicBlock::iterator E = MBB->end();
- // Skip over DBG_VALUE at the start of the MBB.
- if (mi != E && mi->isDebugValue()) {
- while (++mi != E && mi->isDebugValue())
- ;
- if (mi == E)
- // MBB is empty except for DBG_VALUE's.
- return;
- }
-
- SlotIndex baseIndex = MIIdx;
- SlotIndex start = baseIndex;
- if (getInstructionFromIndex(baseIndex) == 0)
- baseIndex = indexes_->getNextNonNullIndex(baseIndex);
-
- SlotIndex end = baseIndex;
- bool SeenDefUse = false;
-
- while (mi != E) {
- if (mi->killsRegister(interval.reg, tri_)) {
- DEBUG(dbgs() << " killed");
- end = baseIndex.getRegSlot();
- SeenDefUse = true;
- break;
- } else if (mi->modifiesRegister(interval.reg, tri_)) {
- // Another instruction redefines the register before it is ever read.
- // Then the register is essentially dead at the instruction that defines
- // it. Hence its interval is:
- // [defSlot(def), defSlot(def)+1)
- DEBUG(dbgs() << " dead");
- end = start.getDeadSlot();
- SeenDefUse = true;
- break;
- }
-
- while (++mi != E && mi->isDebugValue())
- // Skip over DBG_VALUE.
- ;
- if (mi != E)
- baseIndex = indexes_->getNextNonNullIndex(baseIndex);
- }
-
- // Live-in register might not be used at all.
- if (!SeenDefUse) {
- if (isAllocatable(interval.reg) ||
- !isRegLiveIntoSuccessor(MBB, interval.reg)) {
- // Allocatable registers are never live through.
- // Non-allocatable registers that aren't live into any successors also
- // aren't live through.
- DEBUG(dbgs() << " dead");
- return;
- } else {
- // If we get here the register is non-allocatable and live into some
- // successor. We'll conservatively assume it's live-through.
- DEBUG(dbgs() << " live through");
- end = getMBBEndIdx(MBB);
- }
- }
-
- SlotIndex defIdx = getMBBStartIdx(MBB);
- assert(getInstructionFromIndex(defIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator);
- vni->setIsPHIDef(true);
- LiveRange LR(start, end, vni);
-
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR << '\n');
}
/// computeIntervals - computes the live intervals for virtual
@@ -546,12 +367,12 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
<< "********** Function: "
- << ((Value*)mf_->getFunction())->getName() << '\n');
+ << ((Value*)MF->getFunction())->getName() << '\n');
- RegMaskBlocks.resize(mf_->getNumBlockIDs());
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
SmallVector<unsigned, 8> UndefUses;
- for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size();
@@ -564,22 +385,16 @@ void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << "BB#" << MBB->getNumber()
<< ":\t\t# derived from " << MBB->getName() << "\n");
- // Create intervals for live-ins to this BB first.
- for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
- LE = MBB->livein_end(); LI != LE; ++LI) {
- handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
- }
-
// Skip over empty initial indices.
if (getInstructionFromIndex(MIIndex) == 0)
- MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+ MIIndex = Indexes->getNextNonNullIndex(MIIndex);
for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
MI != miEnd; ++MI) {
DEBUG(dbgs() << MIIndex << "\t" << *MI);
if (MI->isDebugValue())
continue;
- assert(indexes_->getInstructionFromIndex(MIIndex) == MI &&
+ assert(Indexes->getInstructionFromIndex(MIIndex) == MI &&
"Lost SlotIndex synchronization");
// Handle defs.
@@ -593,7 +408,7 @@ void LiveIntervals::computeIntervals() {
continue;
}
- if (!MO.isReg() || !MO.getReg())
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
// handle register defs - build intervals
@@ -604,7 +419,7 @@ void LiveIntervals::computeIntervals() {
}
// Move to the next instr slot.
- MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+ MIIndex = Indexes->getNextNonNullIndex(MIIndex);
}
// Compute the number of register mask instructions in this block.
@@ -626,14 +441,104 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
return new LiveInterval(reg, Weight);
}
-/// dupInterval - Duplicate a live interval. The caller is responsible for
-/// managing the allocated memory.
-LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
- LiveInterval *NewLI = createInterval(li->reg);
- NewLI->Copy(*li, mri_, getVNInfoAllocator());
- return NewLI;
+
+//===----------------------------------------------------------------------===//
+// Register Unit Liveness
+//===----------------------------------------------------------------------===//
+//
+// Fixed interference typically comes from ABI boundaries: Function arguments
+// and return values are passed in fixed registers, and so are exception
+// pointers entering landing pads. Certain instructions require values to be
+// present in specific registers. That is also represented through fixed
+// interference.
+//
+
+/// computeRegUnitInterval - Compute the live interval of a register unit, based
+/// on the uses and defs of aliasing registers. The interval should be empty,
+/// or contain only dead phi-defs from ABI blocks.
+void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
+ unsigned Unit = LI->reg;
+
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+
+ // The physregs aliasing Unit are the roots and their super-registers.
+ // Create all values as dead defs before extending to uses. Note that roots
+ // may share super-registers. That's OK because createDeadDefs() is
+ // idempotent. It is very rare for a register unit to have multiple roots, so
+ // uniquing super-registers is probably not worthwhile.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ unsigned Root = *Roots;
+ if (!MRI->reg_empty(Root))
+ LRCalc->createDeadDefs(LI, Root);
+ for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
+ if (!MRI->reg_empty(*Supers))
+ LRCalc->createDeadDefs(LI, *Supers);
+ }
+ }
+
+ // Now extend LI to reach all uses.
+ // Ignore uses of reserved registers. We only track defs of those.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ unsigned Root = *Roots;
+ if (!isReserved(Root) && !MRI->reg_empty(Root))
+ LRCalc->extendToUses(LI, Root);
+ for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
+ unsigned Reg = *Supers;
+ if (!isReserved(Reg) && !MRI->reg_empty(Reg))
+ LRCalc->extendToUses(LI, Reg);
+ }
+ }
+}
+
+
+/// computeLiveInRegUnits - Precompute the live ranges of any register units
+/// that are live-in to an ABI block somewhere. Register values can appear
+/// without a corresponding def when entering the entry block or a landing pad.
+///
+void LiveIntervals::computeLiveInRegUnits() {
+ RegUnitIntervals.resize(TRI->getNumRegUnits());
+ DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
+
+ // Keep track of the intervals allocated.
+ SmallVector<LiveInterval*, 8> NewIntvs;
+
+ // Check all basic blocks for live-ins.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock *MBB = MFI;
+
+ // We only care about ABI blocks: Entry + landing pads.
+ if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty())
+ continue;
+
+ // Create phi-defs at Begin for all live-in registers.
+ SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
+ DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
+ for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(),
+ LIE = MBB->livein_end(); LII != LIE; ++LII) {
+ for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = *Units;
+ LiveInterval *Intv = RegUnitIntervals[Unit];
+ if (!Intv) {
+ Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF);
+ NewIntvs.push_back(Intv);
+ }
+ VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator());
+ (void)VNI;
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n");
+
+ // Compute the 'normal' part of the intervals.
+ for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i)
+ computeRegUnitInterval(NewIntvs[i]);
}
+
/// shrinkToUses - After removing some uses of a register, shrink its live
/// range to just the remaining uses. This method does not compute reaching
/// defs for new uses, and it doesn't remove dead defs.
@@ -649,14 +554,13 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
// Visit all instructions reading li->reg.
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg);
MachineInstr *UseMI = I.skipInstruction();) {
if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
continue;
SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
- // Note: This intentionally picks up the wrong VNI in case of an EC redef.
- // See below.
- VNInfo *VNI = li->getVNInfoBefore(Idx);
+ LiveRangeQuery LRQ(*li, Idx);
+ VNInfo *VNI = LRQ.valueIn();
if (!VNI) {
// This shouldn't happen: readsVirtualRegister returns true, but there is
// no live value. It is likely caused by a target getting <undef> flags
@@ -667,13 +571,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
continue;
}
// Special case: An early-clobber tied operand reads and writes the
- // register one slot early. The getVNInfoBefore call above would have
- // picked up the value defined by UseMI. Adjust the kill slot and value.
- if (SlotIndex::isSameInstr(VNI->def, Idx)) {
- Idx = VNI->def;
- VNI = li->getVNInfoBefore(Idx);
- assert(VNI && "Early-clobber tied value not available");
- }
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
WorkList.push_back(std::make_pair(Idx, VNI));
}
@@ -755,7 +656,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(VNI->def);
assert(MI && "No instruction defining live value");
- MI->addRegisterDead(li->reg, tri_);
+ MI->addRegisterDead(li->reg, TRI);
if (dead && MI->allDefsAreDead()) {
DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
dead->push_back(MI);
@@ -775,13 +676,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
//
void LiveIntervals::addKillFlags() {
- for (iterator I = begin(), E = end(); I != E; ++I) {
- unsigned Reg = I->first;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- continue;
- if (mri_->reg_nodbg_empty(Reg))
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
continue;
- LiveInterval *LI = I->second;
+ LiveInterval *LI = &getInterval(Reg);
// Every instruction that kills Reg corresponds to a live range end point.
for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
@@ -797,101 +696,6 @@ void LiveIntervals::addKillFlags() {
}
}
-/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
-/// allow one) virtual register operand, then its uses are implicitly using
-/// the register. Returns the virtual register.
-unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
- MachineInstr *MI) const {
- unsigned RegOp = 0;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0 || Reg == li.reg)
- continue;
-
- if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg))
- continue;
- RegOp = MO.getReg();
- break; // Found vreg operand - leave the loop.
- }
- return RegOp;
-}
-
-/// isValNoAvailableAt - Return true if the val# of the specified interval
-/// which reaches the given instruction also reaches the specified use index.
-bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
- SlotIndex UseIdx) const {
- VNInfo *UValNo = li.getVNInfoAt(UseIdx);
- return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
-}
-
-/// isReMaterializable - Returns true if the definition MI of the specified
-/// val# of the specified interval is re-materializable.
-bool
-LiveIntervals::isReMaterializable(const LiveInterval &li,
- const VNInfo *ValNo, MachineInstr *MI,
- const SmallVectorImpl<LiveInterval*> *SpillIs,
- bool &isLoad) {
- if (DisableReMat)
- return false;
-
- if (!tii_->isTriviallyReMaterializable(MI, aa_))
- return false;
-
- // Target-specific code can mark an instruction as being rematerializable
- // if it has one virtual reg use, though it had better be something like
- // a PIC base register which is likely to be live everywhere.
- unsigned ImpUse = getReMatImplicitUse(li, MI);
- if (ImpUse) {
- const LiveInterval &ImpLi = getInterval(ImpUse);
- for (MachineRegisterInfo::use_nodbg_iterator
- ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end();
- ri != re; ++ri) {
- MachineInstr *UseMI = &*ri;
- SlotIndex UseIdx = getInstructionIndex(UseMI);
- if (li.getVNInfoAt(UseIdx) != ValNo)
- continue;
- if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
- return false;
- }
-
- // If a register operand of the re-materialized instruction is going to
- // be spilled next, then it's not legal to re-materialize this instruction.
- if (SpillIs)
- for (unsigned i = 0, e = SpillIs->size(); i != e; ++i)
- if (ImpUse == (*SpillIs)[i]->reg)
- return false;
- }
- return true;
-}
-
-/// isReMaterializable - Returns true if every definition of MI of every
-/// val# of the specified interval is re-materializable.
-bool
-LiveIntervals::isReMaterializable(const LiveInterval &li,
- const SmallVectorImpl<LiveInterval*> *SpillIs,
- bool &isLoad) {
- isLoad = false;
- for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
- i != e; ++i) {
- const VNInfo *VNI = *i;
- if (VNI->isUnused())
- continue; // Dead val#.
- // Is the def for the val# rematerializable?
- MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
- if (!ReMatDefMI)
- return false;
- bool DefIsLoad = false;
- if (!ReMatDefMI ||
- !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
- return false;
- isLoad |= DefIsLoad;
- }
- return true;
-}
-
MachineBasicBlock*
LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
// A local live range must be fully contained inside the block, meaning it is
@@ -911,8 +715,8 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
// getMBBFromIndex doesn't need to search the MBB table when both indexes
// belong to proper instructions.
- MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start);
- MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop);
+ MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start);
+ MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop);
return MBB1 == MBB2 ? MBB1 : NULL;
}
@@ -990,7 +794,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
if (!Found) {
// This is the first overlap. Initialize UsableRegs to all ones.
UsableRegs.clear();
- UsableRegs.resize(tri_->getNumRegs(), true);
+ UsableRegs.resize(TRI->getNumRegs(), true);
Found = true;
}
// Remove usable registers clobbered by this mask.
@@ -1101,6 +905,9 @@ public:
BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
+ Entering.clear();
+ Internal.clear();
+ Exiting.clear();
collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
@@ -1176,78 +983,44 @@ private:
// TODO: Currently we're skipping uses that are reserved or have no
// interval, but we're not updating their kills. This should be
// fixed.
- if (!LIS.hasInterval(Reg) ||
- (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))
continue;
- LiveInterval* LI = &LIS.getInterval(Reg);
-
- if (MO.readsReg()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
- if (LR != 0)
- Entering.insert(std::make_pair(LI, LR));
- }
- if (MO.isDef()) {
- if (MO.isEarlyClobber()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true));
- assert(LR != 0 && "No EC range?");
- if (LR->end > OldIdx.getDeadSlot())
- Exiting.insert(std::make_pair(LI, LR));
- else
- Internal.insert(std::make_pair(LI, LR));
- } else if (MO.isDead()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
- assert(LR != 0 && "No dead-def range?");
- Internal.insert(std::make_pair(LI, LR));
- } else {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot());
- assert(LR && LR->end > OldIdx.getDeadSlot() &&
- "Non-dead-def should have live range exiting.");
- Exiting.insert(std::make_pair(LI, LR));
- }
+ // Collect ranges for register units. These live ranges are computed on
+ // demand, so just skip any that haven't been computed yet.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
+ collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx);
+ } else {
+ // Collect ranges for individual virtual registers.
+ collectRanges(MO, &LIS.getInterval(Reg),
+ Entering, Internal, Exiting, OldIdx);
}
}
}
- // Collect IntRangePairs for all operands of MI that may need fixing.
- void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering,
- RangeSet& Exiting, SlotIndex MIStartIdx,
- SlotIndex MIEndIdx) {
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- const MachineOperand& MO = *MOI;
- assert(!MO.isRegMask() && "Can't have RegMasks in bundles.");
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
-
- unsigned Reg = MO.getReg();
-
- // TODO: Currently we're skipping uses that are reserved or have no
- // interval, but we're not updating their kills. This should be
- // fixed.
- if (!LIS.hasInterval(Reg) ||
- (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
- continue;
-
- LiveInterval* LI = &LIS.getInterval(Reg);
-
- if (MO.readsReg()) {
- LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx);
- if (LR != 0)
- Entering.insert(std::make_pair(LI, LR));
- }
- if (MO.isDef()) {
- assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles.");
- assert(!MO.isDead() && "Dead-defs not allowed in bundles.");
- LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot());
- assert(LR != 0 && "Internal ranges not allowed in bundles.");
+ void collectRanges(const MachineOperand &MO, LiveInterval *LI,
+ RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting,
+ SlotIndex OldIdx) {
+ if (MO.readsReg()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
+ if (LR != 0)
+ Entering.insert(std::make_pair(LI, LR));
+ }
+ if (MO.isDef()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
+ assert(LR != 0 && "No live range for def?");
+ if (LR->end > OldIdx.getDeadSlot())
Exiting.insert(std::make_pair(LI, LR));
- }
+ else
+ Internal.insert(std::make_pair(LI, LR));
}
}
- BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) {
+ BundleRanges createBundleRanges(RangeSet& Entering,
+ RangeSet& Internal,
+ RangeSet& Exiting) {
BundleRanges BR;
for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
@@ -1284,7 +1057,8 @@ private:
return; // Bail out if we don't have kill flags on the old register.
MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
- assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill.");
+ assert(!NewKillMI->killsRegister(reg) &&
+ "New kill instr is already a kill.");
OldKillMI->clearRegisterKills(reg, &TRI);
NewKillMI->addRegisterKilled(reg, &TRI);
}
@@ -1523,22 +1297,23 @@ private:
};
void LiveIntervals::handleMove(MachineInstr* MI) {
- SlotIndex OldIndex = indexes_->getInstructionIndex(MI);
- indexes_->removeMachineInstrFromMaps(MI);
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ Indexes->removeMachineInstrFromMaps(MI);
SlotIndex NewIndex = MI->isInsideBundle() ?
- indexes_->getInstructionIndex(MI) :
- indexes_->insertMachineInstrInMaps(MI);
+ Indexes->getInstructionIndex(MI) :
+ Indexes->insertMachineInstrInMaps(MI);
assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
OldIndex < getMBBEndIdx(MI->getParent()) &&
"Cannot handle moves across basic block boundaries.");
assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
- HMEditor HME(*this, *mri_, *tri_, NewIndex);
+ HMEditor HME(*this, *MRI, *TRI, NewIndex);
HME.moveAllRangesFrom(MI, OldIndex);
}
-void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) {
- SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart);
- HMEditor HME(*this, *mri_, *tri_, NewIndex);
+void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
+ MachineInstr* BundleStart) {
+ SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
+ HMEditor HME(*this, *MRI, *TRI, NewIndex);
HME.moveAllRangesInto(MI, BundleStart);
}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 60a6880..dadd02b 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -81,7 +81,6 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
void
LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
- OS << "LIU " << PrintReg(RepReg, TRI);
if (empty()) {
OS << " empty\n";
return;
@@ -209,3 +208,26 @@ bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
VRI = VirtReg->advanceTo(VRI, Overlaps.start());
}
}
+
+void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
+ unsigned NSize) {
+ // Reuse existing allocation.
+ if (NSize == Size)
+ return;
+ clear();
+ Size = NSize;
+ LIUs = static_cast<LiveIntervalUnion*>(
+ malloc(sizeof(LiveIntervalUnion)*NSize));
+ for (unsigned i = 0; i != Size; ++i)
+ new(LIUs + i) LiveIntervalUnion(Alloc);
+}
+
+void LiveIntervalUnion::Array::clear() {
+ if (!LIUs)
+ return;
+ for (unsigned i = 0; i != Size; ++i)
+ LIUs[i].~LiveIntervalUnion();
+ free(LIUs);
+ Size = 0;
+ LIUs = 0;
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index dbf5ac1..cd4e690 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -60,13 +60,11 @@ public:
class Query;
private:
- const unsigned RepReg; // representative register number
unsigned Tag; // unique tag for current contents.
LiveSegments Segments; // union of virtual reg segments
public:
- LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
- {}
+ explicit LiveIntervalUnion(Allocator &a) : Tag(0), Segments(a) {}
// Iterate over all segments in the union of live virtual registers ordered
// by their starting position.
@@ -183,6 +181,28 @@ public:
Query(const Query&); // DO NOT IMPLEMENT
void operator=(const Query&); // DO NOT IMPLEMENT
};
+
+ // Array of LiveIntervalUnions.
+ class Array {
+ unsigned Size;
+ LiveIntervalUnion *LIUs;
+ public:
+ Array() : Size(0), LIUs(0) {}
+ ~Array() { clear(); }
+
+ // Initialize the array to have Size entries.
+ // Reuse an existing allocation if the size matches.
+ void init(LiveIntervalUnion::Allocator&, unsigned Size);
+
+ unsigned size() const { return Size; }
+
+ void clear();
+
+ LiveIntervalUnion& operator[](unsigned idx) {
+ assert(idx < Size && "idx out of bounds");
+ return LIUs[idx];
+ }
+ };
};
} // end namespace llvm
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index d8ab791..9384075 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -14,10 +14,19 @@
#define DEBUG_TYPE "regalloc"
#include "LiveRangeCalc.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
-void LiveRangeCalc::reset(const MachineFunction *MF) {
+void LiveRangeCalc::reset(const MachineFunction *MF,
+ SlotIndexes *SI,
+ MachineDominatorTree *MDT,
+ VNInfo::Allocator *VNIA) {
+ MRI = &MF->getRegInfo();
+ Indexes = SI;
+ DomTree = MDT;
+ Alloc = VNIA;
+
unsigned N = MF->getNumBlockIDs();
Seen.clear();
Seen.resize(N);
@@ -26,8 +35,73 @@ void LiveRangeCalc::reset(const MachineFunction *MF) {
}
+void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // LI->createDeadDef() will deduplicate.
+ for (MachineRegisterInfo::def_iterator
+ I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ // Find the corresponding slot index.
+ SlotIndex Idx;
+ if (MI->isPHI())
+ // PHI defs begin at the basic block start index.
+ Idx = Indexes->getMBBStartIdx(MI->getParent());
+ else
+ // Instructions are either normal 'r', or early clobber 'e'.
+ Idx = Indexes->getInstructionIndex(MI)
+ .getRegSlot(I.getOperand().isEarlyClobber());
+
+ // Create the def in LI. This may find an existing def.
+ VNInfo *VNI = LI->createDeadDef(Idx, *Alloc);
+ VNI->setIsPHIDef(MI->isPHI());
+ }
+}
+
+
+void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all operands that read Reg. This may include partial defs.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
+ E = MRI->reg_nodbg_end(); I != E; ++I) {
+ const MachineOperand &MO = I.getOperand();
+ if (!MO.readsReg())
+ continue;
+ // MI is reading Reg. We may have visited MI before if it happens to be
+ // reading Reg multiple times. That is OK, extend() is idempotent.
+ const MachineInstr *MI = &*I;
+
+ // Find the SlotIndex being read.
+ SlotIndex Idx;
+ if (MI->isPHI()) {
+ assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
+ // PHI operands are paired: (Reg, PredMBB).
+ // Extend the live range to be live-out from PredMBB.
+ Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB());
+ } else {
+ // This is a normal instruction.
+ Idx = Indexes->getInstructionIndex(MI).getRegSlot();
+ // Check for early-clobber redefs.
+ unsigned DefIdx;
+ if (MO.isDef()) {
+ if (MO.isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) {
+ // FIXME: This would be a lot easier if tied early-clobber uses also
+ // had an early-clobber flag.
+ if (MI->getOperand(DefIdx).isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ }
+ }
+ extend(LI, Idx, Reg);
+ }
+}
+
+
// Transfer information from the LiveIn vector to the live ranges.
-void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) {
+void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) {
for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
E = LiveIn.end(); I != E; ++I) {
if (!I->DomNode)
@@ -56,9 +130,7 @@ void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) {
void LiveRangeCalc::extend(LiveInterval *LI,
SlotIndex Kill,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc) {
+ unsigned PhysReg) {
assert(LI && "Missing live range");
assert(Kill.isValid() && "Invalid SlotIndex");
assert(Indexes && "Missing SlotIndexes");
@@ -75,34 +147,31 @@ void LiveRangeCalc::extend(LiveInterval *LI,
// multiple values, and we may need to create even more phi-defs to preserve
// VNInfo SSA form. Perform a search for all predecessor blocks where we
// know the dominating VNInfo.
- VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, Indexes, DomTree);
+ VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg);
// When there were multiple different values, we may need new PHIs.
if (!VNI)
- updateSSA(Indexes, DomTree, Alloc);
+ updateSSA();
- updateLiveIns(VNI, Indexes);
+ updateLiveIns(VNI);
}
// This function is called by a client after using the low-level API to add
// live-out and live-in blocks. The unique value optimization is not
// available, SplitEditor::transferValues handles that case directly anyway.
-void LiveRangeCalc::calculateValues(SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc) {
+void LiveRangeCalc::calculateValues() {
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
- updateSSA(Indexes, DomTree, Alloc);
- updateLiveIns(0, Indexes);
+ updateSSA();
+ updateLiveIns(0);
}
VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
MachineBasicBlock *KillMBB,
SlotIndex Kill,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree) {
+ unsigned PhysReg) {
// Blocks where LI should be live-in.
SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
@@ -113,7 +182,22 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// Using Seen as a visited set, perform a BFS for all reaching defs.
for (unsigned i = 0; i != WorkList.size(); ++i) {
MachineBasicBlock *MBB = WorkList[i];
- assert(!MBB->pred_empty() && "Value live-in to entry block?");
+
+#ifndef NDEBUG
+ if (MBB->pred_empty()) {
+ MBB->getParent()->verify();
+ llvm_unreachable("Use not jointly dominated by defs.");
+ }
+
+ if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ !MBB->isLiveIn(PhysReg)) {
+ MBB->getParent()->verify();
+ errs() << "The register needs to be live in to BB#" << MBB->getNumber()
+ << ", but is missing from the live-in list.\n";
+ llvm_unreachable("Invalid global physical register");
+ }
+#endif
+
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
MachineBasicBlock *Pred = *PI;
@@ -168,9 +252,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// This is essentially the same iterative algorithm that SSAUpdater uses,
// except we already have a dominator tree, so we don't have to recompute it.
-void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc) {
+void LiveRangeCalc::updateSSA() {
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index b8c8585..909829b 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -34,6 +34,11 @@ template <class NodeT> class DomTreeNodeBase;
typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
class LiveRangeCalc {
+ const MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ MachineDominatorTree *DomTree;
+ VNInfo::Allocator *Alloc;
+
/// Seen - Bit vector of active entries in LiveOut, also used as a visited
/// set by findReachingDefs. One entry per basic block, indexed by block
/// number. This is kept as a separate bit vector because it can be cleared
@@ -100,26 +105,27 @@ class LiveRangeCalc {
/// to be live-in are added to LiveIn. If a unique reaching def is found,
/// its value is returned, if Kill is jointly dominated by multiple values,
/// NULL is returned.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
VNInfo *findReachingDefs(LiveInterval *LI,
MachineBasicBlock *KillMBB,
SlotIndex Kill,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree);
+ unsigned PhysReg);
/// updateSSA - Compute the values that will be live in to all requested
/// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
///
/// Every live-in block must be jointly dominated by the added live-out
/// blocks. No values are read from the live ranges.
- void updateSSA(SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc);
+ void updateSSA();
/// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI
/// as a wildcard value for LiveIn entries without a value.
- void updateLiveIns(VNInfo *VNI, SlotIndexes*);
+ void updateLiveIns(VNInfo *VNI);
public:
+ LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
+
//===--------------------------------------------------------------------===//
// High-level interface.
//===--------------------------------------------------------------------===//
@@ -132,14 +138,14 @@ public:
/// that may overlap a previously computed live range, and before the first
/// live range in a function. If live ranges are not known to be
/// non-overlapping, call reset before each.
- void reset(const MachineFunction *MF);
+ void reset(const MachineFunction *MF,
+ SlotIndexes*,
+ MachineDominatorTree*,
+ VNInfo::Allocator*);
/// calculate - Calculate the live range of a virtual register from its defs
/// and uses. LI must be empty with no values.
- void calculate(LiveInterval *LI,
- MachineRegisterInfo *MRI,
- SlotIndexes *Indexes,
- VNInfo::Allocator *Alloc);
+ void calculate(LiveInterval *LI);
//===--------------------------------------------------------------------===//
// Mid-level interface.
@@ -154,21 +160,30 @@ public:
/// Kill is not dominated by a single existing value, PHI-defs are inserted
/// as required to preserve SSA form. If Kill is known to be dominated by a
/// single existing value, Alloc may be null.
- void extend(LiveInterval *LI,
- SlotIndex Kill,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc);
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0);
+
+ /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
+ /// Each instruction defining Reg gets a new VNInfo with a corresponding
+ /// minimal live range.
+ void createDeadDefs(LiveInterval *LI, unsigned Reg);
- /// extendToUses - Extend the live range of LI to reach all uses.
+ /// createDeadDefs - Create a dead def in LI for every def of LI->reg.
+ void createDeadDefs(LiveInterval *LI) {
+ createDeadDefs(LI, LI->reg);
+ }
+
+ /// extendToUses - Extend the live range of LI to reach all uses of Reg.
///
/// All uses must be jointly dominated by existing liveness. PHI-defs are
/// inserted as needed to preserve SSA form.
- void extendToUses(LiveInterval *LI,
- MachineRegisterInfo *MRI,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc);
+ void extendToUses(LiveInterval *LI, unsigned Reg);
+
+ /// extendToUses - Extend the live range of LI to reach all uses of LI->reg.
+ void extendToUses(LiveInterval *LI) {
+ extendToUses(LI, LI->reg);
+ }
//===--------------------------------------------------------------------===//
// Low-level interface.
@@ -216,9 +231,7 @@ public:
///
/// Every predecessor of a live-in block must have been given a value with
/// setLiveOutValue, the value may be null for live-trough blocks.
- void calculateValues(SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc);
+ void calculateValues();
};
} // end namespace llvm
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 695f536..896fdbf 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -38,7 +38,7 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
LiveInterval &LI = LIS.getOrCreateInterval(VReg);
- newRegs_.push_back(&LI);
+ NewRegs.push_back(&LI);
return LI;
}
@@ -46,16 +46,16 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
const MachineInstr *DefMI,
AliasAnalysis *aa) {
assert(DefMI && "Missing instruction");
- scannedRemattable_ = true;
+ ScannedRemattable = true;
if (!TII.isTriviallyReMaterializable(DefMI, aa))
return false;
- remattable_.insert(VNI);
+ Remattable.insert(VNI);
return true;
}
void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
- for (LiveInterval::vni_iterator I = parent_.vni_begin(),
- E = parent_.vni_end(); I != E; ++I) {
+ for (LiveInterval::vni_iterator I = getParent().vni_begin(),
+ E = getParent().vni_end(); I != E; ++I) {
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
@@ -64,13 +64,13 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
continue;
checkRematerializable(VNI, DefMI, aa);
}
- scannedRemattable_ = true;
+ ScannedRemattable = true;
}
bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
- if (!scannedRemattable_)
+ if (!ScannedRemattable)
scanRemattable(aa);
- return !remattable_.empty();
+ return !Remattable.empty();
}
/// allUsesAvailableAt - Return true if all registers used by OrigMI at
@@ -82,12 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
UseIdx = UseIdx.getRegSlot(true);
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = OrigMI->getOperand(i);
- if (!MO.isReg() || !MO.getReg() || MO.isDef())
- continue;
- // Reserved registers are OK.
- if (MO.isUndef() || !LIS.hasInterval(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
+ // We can't remat physreg uses, unless it is a constant.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction()))
+ continue;
+ return false;
+ }
+
LiveInterval &li = LIS.getInterval(MO.getReg());
const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
if (!OVNI)
@@ -101,10 +105,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
bool LiveRangeEdit::canRematerializeAt(Remat &RM,
SlotIndex UseIdx,
bool cheapAsAMove) {
- assert(scannedRemattable_ && "Call anyRematerializable first");
+ assert(ScannedRemattable && "Call anyRematerializable first");
// Use scanRemattable info.
- if (!remattable_.count(RM.ParentVNI))
+ if (!Remattable.count(RM.ParentVNI))
return false;
// No defining instruction provided.
@@ -136,13 +140,13 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
bool Late) {
assert(RM.OrigMI && "Invalid remat");
TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
- rematted_.insert(RM.ParentVNI);
+ Rematted.insert(RM.ParentVNI);
return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
.getRegSlot();
}
void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
- if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
+ if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg))
LIS.removeInterval(Reg);
}
@@ -173,6 +177,19 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (!DefMI || !UseMI)
return false;
+ // Since we're moving the DefMI load, make sure we're not extending any live
+ // ranges.
+ if (!allUsesAvailableAt(DefMI,
+ LIS.getInstructionIndex(DefMI),
+ LIS.getInstructionIndex(UseMI)))
+ return false;
+
+ // We also need to make sure it is safe to move the load.
+ // Assume there are stores between DefMI and UseMI.
+ bool SawStore = true;
+ if (!DefMI->isSafeToMove(&TII, 0, SawStore))
+ return false;
+
DEBUG(dbgs() << "Try to fold single def: " << *DefMI
<< " into single use: " << *UseMI);
@@ -220,6 +237,9 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+ // Collect virtual registers to be erased after MI is gone.
+ SmallVector<unsigned, 8> RegsToErase;
+
// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
@@ -242,22 +262,30 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// Remove defined value.
if (MOI->isDef()) {
if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
- if (delegate_)
- delegate_->LRE_WillShrinkVirtReg(LI.reg);
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
LI.removeValNo(VNI);
- if (LI.empty()) {
- ToShrink.remove(&LI);
- eraseVirtReg(Reg);
- }
+ if (LI.empty())
+ RegsToErase.push_back(Reg);
}
}
}
- if (delegate_)
- delegate_->LRE_WillEraseInstruction(MI);
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
LIS.RemoveMachineInstrFromMaps(MI);
MI->eraseFromParent();
++NumDCEDeleted;
+
+ // Erase any virtregs that are now empty and unused. There may be <undef>
+ // uses around. Keep the empty live range in that case.
+ for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
+ unsigned Reg = RegsToErase[i];
+ if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
+ ToShrink.remove(&LIS.getInterval(Reg));
+ eraseVirtReg(Reg);
+ }
+ }
}
if (ToShrink.empty())
@@ -268,8 +296,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
ToShrink.pop_back();
if (foldAsLoad(LI, Dead))
continue;
- if (delegate_)
- delegate_->LRE_WillShrinkVirtReg(LI->reg);
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(LI->reg);
if (!LIS.shrinkToUses(LI, &Dead))
continue;
@@ -304,10 +332,14 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// interval must contain all the split products, and LI doesn't.
if (IsOriginal)
VRM->setIsSplitFromReg(Dups.back()->reg, 0);
- if (delegate_)
- delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+ if (TheDelegate)
+ TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
}
ConEQ.Distribute(&Dups[0], MRI);
+ DEBUG({
+ for (unsigned i = 0; i != NumComp; ++i)
+ dbgs() << '\t' << *Dups[i] << '\n';
+ });
}
}
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
new file mode 100644
index 0000000..cdb1776
--- /dev/null
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -0,0 +1,152 @@
+//===-- LiveRegMatrix.cpp - Track register interference -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LiveRegMatrix analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRegMatrix.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+
+char LiveRegMatrix::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID),
+ UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {}
+
+void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.addRequiredTransitive<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ unsigned NumRegUnits = TRI->getNumRegUnits();
+ if (NumRegUnits != Matrix.size())
+ Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]);
+ Matrix.init(LIUAlloc, NumRegUnits);
+
+ // Make sure no stale queries get reused.
+ invalidateVirtRegs();
+ return false;
+}
+
+void LiveRegMatrix::releaseMemory() {
+ for (unsigned i = 0, e = Matrix.size(); i != e; ++i) {
+ Matrix[i].clear();
+ Queries[i].clear();
+ }
+}
+
+void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << ':');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ MRI->setPhysRegUsed(PhysReg);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
+ Matrix[*Units].unify(VirtReg);
+ }
+ ++NumAssigned;
+ DEBUG(dbgs() << '\n');
+}
+
+void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
+ unsigned PhysReg = VRM->getPhys(VirtReg.reg);
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << ':');
+ VRM->clearVirt(VirtReg.reg);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
+ Matrix[*Units].extract(VirtReg);
+ }
+ ++NumUnassigned;
+ DEBUG(dbgs() << '\n');
+}
+
+bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ // Check if the cached information is valid.
+ // The same BitVector can be reused for all PhysRegs.
+ // We could cache multiple VirtRegs if it becomes necessary.
+ if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) {
+ RegMaskVirtReg = VirtReg.reg;
+ RegMaskTag = UserTag;
+ RegMaskUsable.clear();
+ LIS->checkRegMaskInterference(VirtReg, RegMaskUsable);
+ }
+
+ // The BitVector is indexed by PhysReg, not register unit.
+ // Regmask interference is more fine grained than regunits.
+ // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8.
+ return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg));
+}
+
+bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ if (VirtReg.empty())
+ return false;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (VirtReg.overlaps(LIS->getRegUnit(*Units)))
+ return true;
+ return false;
+}
+
+LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+ unsigned RegUnit) {
+ LiveIntervalUnion::Query &Q = Queries[RegUnit];
+ Q.init(UserTag, &VirtReg, &Matrix[RegUnit]);
+ return Q;
+}
+
+LiveRegMatrix::InterferenceKind
+LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
+ if (VirtReg.empty())
+ return IK_Free;
+
+ // Regmask interference is the fastest check.
+ if (checkRegMaskInterference(VirtReg, PhysReg))
+ return IK_RegMask;
+
+ // Check for fixed interference.
+ if (checkRegUnitInterference(VirtReg, PhysReg))
+ return IK_RegUnit;
+
+ // Check the matrix for virtual register interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (query(VirtReg, *Units).checkInterference())
+ return IK_VirtReg;
+
+ return IK_Free;
+}
diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h
new file mode 100644
index 0000000..b3e2d7f
--- /dev/null
+++ b/lib/CodeGen/LiveRegMatrix.h
@@ -0,0 +1,148 @@
+//===-- LiveRegMatrix.h - Track register interference ---------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRegMatrix analysis pass keeps track of virtual register interference
+// along two dimensions: Slot indexes and register units. The matrix is used by
+// register allocators to ensure that no interfering virtual registers get
+// assigned to overlapping physical registers.
+//
+// Register units are defined in MCRegisterInfo.h, they represent the smallest
+// unit of interference when dealing with overlapping physical registers. The
+// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When
+// a virtual register is assigned to a physicval register, the live range for
+// the virtual register is inserted into the LiveIntervalUnion for each regunit
+// in the physreg.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEREGMATRIX_H
+#define LLVM_CODEGEN_LIVEREGMATRIX_H
+
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class LiveInterval;
+class LiveIntervalAnalysis;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+
+class LiveRegMatrix : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+ VirtRegMap *VRM;
+
+ // UserTag changes whenever virtual registers have been modified.
+ unsigned UserTag;
+
+ // The matrix is represented as a LiveIntervalUnion per register unit.
+ LiveIntervalUnion::Allocator LIUAlloc;
+ LiveIntervalUnion::Array Matrix;
+
+ // Cached queries per register unit.
+ OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+ // Cached register mask interference info.
+ unsigned RegMaskTag;
+ unsigned RegMaskVirtReg;
+ BitVector RegMaskUsable;
+
+ // MachineFunctionPass boilerplate.
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ virtual bool runOnMachineFunction(MachineFunction&);
+ virtual void releaseMemory();
+public:
+ static char ID;
+ LiveRegMatrix();
+
+ //===--------------------------------------------------------------------===//
+ // High-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Check for interference before assigning virtual registers to physical
+ // registers.
+ //
+
+ /// Invalidate cached interference queries after modifying virtual register
+ /// live ranges. Interference checks may return stale information unless
+ /// caches are invalidated.
+ void invalidateVirtRegs() { ++UserTag; }
+
+ enum InterferenceKind {
+ /// No interference, go ahead and assign.
+ IK_Free = 0,
+
+ /// Virtual register interference. There are interfering virtual registers
+ /// assigned to PhysReg or its aliases. This interference could be resolved
+ /// by unassigning those other virtual registers.
+ IK_VirtReg,
+
+ /// Register unit interference. A fixed live range is in the way, typically
+ /// argument registers for a call. This can't be resolved by unassigning
+ /// other virtual registers.
+ IK_RegUnit,
+
+ /// RegMask interference. The live range is crossing an instruction with a
+ /// regmask operand that doesn't preserve PhysReg. This typically means
+ /// VirtReg is live across a call, and PhysReg isn't call-preserved.
+ IK_RegMask
+ };
+
+ /// Check for interference before assigning VirtReg to PhysReg.
+ /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
+ /// When there is more than one kind of interference, the InterferenceKind
+ /// with the highest enum value is returned.
+ InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// Assign VirtReg to PhysReg.
+ /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
+ /// update VirtRegMap. The live range is expected to be available in PhysReg.
+ void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// Unassign VirtReg from its PhysReg.
+ /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
+ /// the assignment and updates VirtRegMap accordingly.
+ void unassign(LiveInterval &VirtReg);
+
+ //===--------------------------------------------------------------------===//
+ // Low-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Provide access to the underlying LiveIntervalUnions.
+ //
+
+ /// Check for regmask interference only.
+ /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
+ /// If PhysReg is null, check if VirtReg crosses any regmask operands.
+ bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0);
+
+ /// Check for regunit interference only.
+ /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
+ /// register units.
+ bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// Query a line of the assigned virtual register matrix directly.
+ /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
+ /// This returns a reference to an internal Query data structure that is only
+ /// valid until the next query() call.
+ LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit);
+
+ /// Directly access the live interval unions per regunit.
+ /// This returns an array indexed by the regunit number.
+ LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEREGMATRIX_H
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 5a0d97d..348ed3a 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -192,8 +192,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
MachineInstr *LastDef = NULL;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
MachineInstr *Def = PhysRegDef[SubReg];
if (!Def)
continue;
@@ -216,9 +216,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
unsigned DefReg = MO.getReg();
if (TRI->isSubRegister(Reg, DefReg)) {
PartDefRegs.insert(DefReg);
- for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- PartDefRegs.insert(SubReg);
+ for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs)
+ PartDefRegs.insert(*SubRegs);
}
}
return LastDef;
@@ -247,8 +246,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
true/*IsImp*/));
PhysRegDef[Reg] = LastPartialDef;
SmallSet<unsigned, 8> Processed;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
if (Processed.count(SubReg))
continue;
if (PartDefRegs.count(SubReg))
@@ -259,7 +258,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
false/*IsDef*/,
true/*IsImp*/));
PhysRegDef[SubReg] = LastPartialDef;
- for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
Processed.insert(*SS);
}
}
@@ -271,9 +270,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
// Remember this use.
PhysRegUse[Reg] = MI;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- PhysRegUse[SubReg] = MI;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ PhysRegUse[*SubRegs] = MI;
}
/// FindLastRefOrPartRef - Return the last reference or partial reference of
@@ -287,8 +285,8 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
unsigned LastPartDefDist = 0;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
// There was a def of this sub-register in between. This is a partial
@@ -336,8 +334,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
MachineInstr *LastPartDef = 0;
unsigned LastPartDefDist = 0;
SmallSet<unsigned, 8> PartUses;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
// There was a def of this sub-register in between. This is a partial
@@ -351,7 +349,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
}
if (MachineInstr *Use = PhysRegUse[SubReg]) {
PartUses.insert(SubReg);
- for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
PartUses.insert(*SS);
unsigned Dist = DistanceMap[Use];
if (Dist > LastRefOrPartRefDist) {
@@ -367,8 +365,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// EAX<dead> = op AL<imp-def>
// That is, EAX def is dead but AL def extends pass it.
PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
if (!PartUses.count(SubReg))
continue;
bool NeedDef = true;
@@ -388,11 +386,10 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
else {
LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
PhysRegUse[SubReg] = LastRefOrPartRef;
- for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg);
- unsigned SSReg = *SSRegs; ++SSRegs)
- PhysRegUse[SSReg] = LastRefOrPartRef;
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PhysRegUse[*SS] = LastRefOrPartRef;
}
- for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
PartUses.erase(*SS);
}
} else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
@@ -434,7 +431,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
// Kill the largest clobbered super-register.
// This avoids needless implicit operands.
unsigned Super = Reg;
- for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR)
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
Super = *SR;
HandlePhysRegKill(Super, 0);
@@ -447,11 +444,11 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
SmallSet<unsigned, 32> Live;
if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
Live.insert(Reg);
- for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
- Live.insert(*SS);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Live.insert(*SubRegs);
} else {
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
// If a register isn't itself defined, but all parts that make up of it
// are defined, then consider it also defined.
// e.g.
@@ -462,7 +459,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
continue;
if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
Live.insert(SubReg);
- for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
Live.insert(*SS);
}
}
@@ -472,8 +469,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
// is referenced.
HandlePhysRegKill(Reg, MI);
// Only some of the sub-registers are used.
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
if (!Live.count(SubReg))
// Skip if this sub-register isn't defined.
continue;
@@ -491,8 +488,8 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
Defs.pop_back();
PhysRegDef[Reg] = MI;
PhysRegUse[Reg] = NULL;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
PhysRegDef[SubReg] = MI;
PhysRegUse[SubReg] = NULL;
}
@@ -576,7 +573,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
unsigned MOReg = MO.getReg();
if (MO.isUse()) {
MO.setIsKill(false);
- UseRegs.push_back(MOReg);
+ if (MO.readsReg())
+ UseRegs.push_back(MOReg);
} else /*MO.isDef()*/ {
MO.setIsDead(false);
DefRegs.push_back(MOReg);
@@ -732,8 +730,9 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
BBI != BBE && BBI->isPHI(); ++BBI)
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
- PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
- .push_back(BBI->getOperand(i).getReg());
+ if (BBI->getOperand(i).readsReg())
+ PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI->getOperand(i).getReg());
}
bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 238bf52..fbc9e20 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -314,7 +314,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// No previously defined register was in range, so create a
// new one.
int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
- const TargetRegisterClass *RC = TRI->getPointerRegClass();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
DEBUG(dbgs() << " Materializing base register " << BaseReg <<
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 1abb8f2..ecc1e95 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -271,11 +271,9 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
- if (Alignment) {
+ if (Alignment)
OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
<< " bytes)";
- Comma = ", ";
- }
OS << '\n';
@@ -596,6 +594,11 @@ bool MachineBasicBlock::canFallThrough() {
MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (Succ->isLandingPad())
+ return NULL;
+
MachineFunction *MF = getParent();
DebugLoc dl; // FIXME: this is nowhere
@@ -670,7 +673,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Inherit live-ins from the successor
for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
- E = Succ->livein_end(); I != E; ++I)
+ E = Succ->livein_end(); I != E; ++I)
NMBB->addLiveIn(*I);
// Update LiveVariables.
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 5ba6851..5a15f92 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -11,7 +11,7 @@
// structure and branch probability estimates.
//
// The pass strives to preserve the structure of the CFG (that is, retain
-// a topological ordering of basic blocks) in the absense of a *strong* signal
+// a topological ordering of basic blocks) in the absence of a *strong* signal
// to the contrary from probabilities. However, within the CFG structure, it
// attempts to choose an ordering which favors placing more likely sequences of
// blocks adjacent to each other.
@@ -63,17 +63,13 @@ namespace {
///
/// This is the datastructure representing a chain of consecutive blocks that
/// are profitable to layout together in order to maximize fallthrough
-/// probabilities. We also can use a block chain to represent a sequence of
-/// basic blocks which have some external (correctness) requirement for
-/// sequential layout.
+/// probabilities and code locality. We also can use a block chain to represent
+/// a sequence of basic blocks which have some external (correctness)
+/// requirement for sequential layout.
///
-/// Eventually, the block chains will form a directed graph over the function.
-/// We provide an SCC-supporting-iterator in order to quicky build and walk the
-/// SCCs of block chains within a function.
-///
-/// The block chains also have support for calculating and caching probability
-/// information related to the chain itself versus other chains. This is used
-/// for ranking during the final layout of block chains.
+/// Chains can be built around a single basic block and can be merged to grow
+/// them. They participate in a block-to-chain mapping, which is updated
+/// automatically as chains are merged together.
class BlockChain {
/// \brief The sequence of blocks belonging to this chain.
///
@@ -179,10 +175,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief Allocator and owner of BlockChain structures.
///
- /// We build BlockChains lazily by merging together high probability BB
- /// sequences acording to the "Algo2" in the paper mentioned at the top of
- /// the file. To reduce malloc traffic, we allocate them using this slab-like
- /// allocator, and destroy them after the pass completes.
+ /// We build BlockChains lazily while processing the loop structure of
+ /// a function. To reduce malloc traffic, we allocate them using this
+ /// slab-like allocator, and destroy them after the pass completes. An
+ /// important guarantee is that this allocator produces stable pointers to
+ /// the chains.
SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
/// \brief Function wide BasicBlock to BlockChain mapping.
@@ -329,7 +326,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
// the MBPI analysis, we manually compute probabilities using the edge
// weights. This is suboptimal as it means that the somewhat subtle
// definition of edge weight semantics is encoded here as well. We should
- // improve the MBPI interface to effeciently support query patterns such as
+ // improve the MBPI interface to efficiently support query patterns such as
// this.
uint32_t BestWeight = 0;
uint32_t WeightScale = 0;
@@ -1053,7 +1050,7 @@ namespace {
///
/// A separate pass to compute interesting statistics for evaluating block
/// placement. This is separate from the actual placement pass so that they can
-/// be computed in the absense of any placement transformations or when using
+/// be computed in the absence of any placement transformations or when using
/// alternative placement strategies.
class MachineBlockPlacementStats : public MachineFunctionPass {
/// \brief A handle to the branch probability pass.
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index a63688e..9cfe9ab 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -84,7 +84,7 @@ namespace {
bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
- MachineBasicBlock::const_iterator E) const ;
+ MachineBasicBlock::const_iterator E) const;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs,
@@ -100,8 +100,7 @@ namespace {
void ExitScope(MachineBasicBlock *MBB);
bool ProcessBlock(MachineBasicBlock *MBB);
void ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);
};
} // end anonymous namespace
@@ -216,11 +215,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (MO.isDef() &&
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue;
- PhysRefs.insert(Reg);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
if (MO.isDef())
PhysDefs.push_back(Reg);
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
- PhysRefs.insert(*Alias);
}
return !PhysRefs.empty();
@@ -437,7 +435,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers.
bool CrossMBBPhysDef = false;
- SmallSet<unsigned,8> PhysRefs;
+ SmallSet<unsigned, 8> PhysRefs;
SmallVector<unsigned, 2> PhysDefs;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
FoundCSE = false;
@@ -480,6 +478,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
"Do not CSE physical register defs!");
if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
}
@@ -488,6 +487,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// within the register class of the new instruction.
const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
if (!MRI->constrainRegClass(NewReg, OldRC)) {
+ DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
DoCSE = false;
break;
}
@@ -522,7 +522,6 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
++NumCommutes;
Changed = true;
} else {
- DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
VNT.insert(MI, CurrVN++);
Exps.push_back(MI);
}
@@ -537,8 +536,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
/// up the dominator tree to destroy ancestors which are now done.
void
MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
if (OpenChildren[Node])
return;
@@ -546,7 +544,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
ExitScope(Node->getBlock());
// Now traverse upwards to pop ancestors whose offsprings are all done.
- while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ while (MachineDomTreeNode *Parent = Node->getIDom()) {
unsigned Left = --OpenChildren[Parent];
if (Left != 0)
break;
@@ -558,7 +556,6 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
SmallVector<MachineDomTreeNode*, 32> Scopes;
SmallVector<MachineDomTreeNode*, 8> WorkList;
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
CurrVN = 0;
@@ -573,7 +570,6 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
OpenChildren[Node] = NumChildren;
for (unsigned i = 0; i != NumChildren; ++i) {
MachineDomTreeNode *Child = Children[i];
- ParentMap[Child] = Node;
WorkList.push_back(Child);
}
} while (!WorkList.empty());
@@ -586,7 +582,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
EnterScope(MBB);
Changed |= ProcessBlock(MBB);
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
- ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ ExitScopeIfDone(Node, OpenChildren);
}
return Changed;
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 9730eaa..bac3aa2 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -62,28 +62,16 @@ void
MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
SourceMap &SrcMap,
DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
- SourceMap::iterator SI = SrcMap.find(Reg);
- if (SI != SrcMap.end()) {
- const DestList& Defs = SI->second;
- for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
- I != E; ++I) {
- unsigned MappedDef = *I;
- // Source of copy is no longer available for propagation.
- if (AvailCopyMap.erase(MappedDef)) {
- for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
- AvailCopyMap.erase(*SR);
- }
- }
- }
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- SI = SrcMap.find(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ SourceMap::iterator SI = SrcMap.find(*AI);
if (SI != SrcMap.end()) {
const DestList& Defs = SI->second;
for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
I != E; ++I) {
unsigned MappedDef = *I;
+ // Source of copy is no longer available for propagation.
if (AvailCopyMap.erase(MappedDef)) {
- for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+ for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
AvailCopyMap.erase(*SR);
}
}
@@ -188,11 +176,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
// If Src is defined by a previous copy, it cannot be eliminated.
- CI = CopyMap.find(Src);
- if (CI != CopyMap.end())
- MaybeDeadCopies.remove(CI->second);
- for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) {
- CI = CopyMap.find(*AS);
+ for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
+ CI = CopyMap.find(*AI);
if (CI != CopyMap.end())
MaybeDeadCopies.remove(CI->second);
}
@@ -211,13 +196,13 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// Remember Def is defined by the copy.
// ... Make sure to clear the def maps of aliases first.
- for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) {
- CopyMap.erase(*AS);
- AvailCopyMap.erase(*AS);
+ for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
}
CopyMap[Def] = MI;
AvailCopyMap[Def] = MI;
- for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
+ for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) {
CopyMap[*SR] = MI;
AvailCopyMap[*SR] = MI;
}
@@ -256,11 +241,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination.
- DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
- if (CI != CopyMap.end())
- MaybeDeadCopies.remove(CI->second);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- CI = CopyMap.find(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI);
if (CI != CopyMap.end())
MaybeDeadCopies.remove(CI->second);
}
@@ -296,11 +278,9 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
unsigned Reg = Defs[i];
// No longer defined by a copy.
- CopyMap.erase(Reg);
- AvailCopyMap.erase(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- CopyMap.erase(*AS);
- AvailCopyMap.erase(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
}
// If 'Reg' is previously source of a copy, it is no longer available for
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index d8c2f6a..d4aede8 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -26,7 +27,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -60,7 +60,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
MFInfo = 0;
FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
if (Fn->hasFnAttr(Attribute::StackAlignment))
- FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
+ FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs(
Fn->getAttributes().getFnAttributes()));
ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
@@ -84,9 +84,13 @@ MachineFunction::~MachineFunction() {
MFInfo->~MachineFunctionInfo();
Allocator.Deallocate(MFInfo);
}
- FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo);
- ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool);
-
+
+ FrameInfo->~MachineFrameInfo();
+ Allocator.Deallocate(FrameInfo);
+
+ ConstantPool->~MachineConstantPool();
+ Allocator.Deallocate(ConstantPool);
+
if (JumpTableInfo) {
JumpTableInfo->~MachineJumpTableInfo();
Allocator.Deallocate(JumpTableInfo);
@@ -98,7 +102,7 @@ MachineFunction::~MachineFunction() {
MachineJumpTableInfo *MachineFunction::
getOrCreateJumpTableInfo(unsigned EntryKind) {
if (JumpTableInfo) return JumpTableInfo;
-
+
JumpTableInfo = new (Allocator)
MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
return JumpTableInfo;
@@ -116,12 +120,12 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
MBBI = begin();
else
MBBI = MBB;
-
+
// Figure out the block number this should have.
unsigned BlockNo = 0;
if (MBBI != begin())
BlockNo = prior(MBBI)->getNumber()+1;
-
+
for (; MBBI != E; ++MBBI, ++BlockNo) {
if (MBBI->getNumber() != (int)BlockNo) {
// Remove use of the old number.
@@ -130,7 +134,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
"MBB number mismatch!");
MBBNumbering[MBBI->getNumber()] = 0;
}
-
+
// If BlockNo is already taken, set that block's number to -1.
if (MBBNumbering[BlockNo])
MBBNumbering[BlockNo]->setNumber(-1);
@@ -138,7 +142,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
MBBNumbering[BlockNo] = MBBI;
MBBI->setNumber(BlockNo);
}
- }
+ }
// Okay, all the blocks are renumbered. If we have compactified the block
// numbering, shrink MBBNumbering now.
@@ -295,16 +299,16 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
// Print Frame Information
FrameInfo->print(*this, OS);
-
+
// Print JumpTable Information
if (JumpTableInfo)
JumpTableInfo->print(OS);
// Print Constant Pool
ConstantPool->print(OS);
-
+
const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
-
+
if (RegInfo && !RegInfo->livein_empty()) {
OS << "Function Live Ins: ";
for (MachineRegisterInfo::livein_iterator
@@ -324,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << ' ' << PrintReg(*I, TRI);
OS << '\n';
}
-
+
for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
OS << '\n';
BB->print(OS, Indexes);
@@ -411,10 +415,9 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate) const {
assert(JumpTableInfo && "No jump tables");
-
assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
-
+
const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
MAI.getPrivateGlobalPrefix();
SmallString<60> Name;
@@ -691,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
else if (B->getType() != IntTy)
B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
const_cast<Constant*>(B), TD);
-
+
return A == B;
}
@@ -714,7 +717,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
Constants[i].Alignment = Alignment;
return i;
}
-
+
Constants.push_back(MachineConstantPoolEntry(C, Alignment));
return Constants.size()-1;
}
@@ -723,7 +726,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
unsigned Alignment) {
assert(Alignment && "Alignment must be specified!");
if (Alignment > PoolAlignment) PoolAlignment = Alignment;
-
+
// Check to see if we already have this constant.
//
// FIXME, this could be made much more efficient for large constant pools.
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 2aaa798..0102ac7 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -14,7 +14,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -28,6 +30,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
raw_ostream &OS;
const std::string Banner;
+ MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
: MachineFunctionPass(ID), OS(os), Banner(banner) {}
@@ -40,7 +43,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) {
OS << "# " << Banner << ":\n";
- MF.print(OS);
+ MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
return false;
}
};
@@ -48,6 +51,10 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
char MachineFunctionPrinterPass::ID = 0;
}
+char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
+ "Machine Function Printer", false, false)
+
namespace llvm {
/// Returns a newly-created MachineFunction Printer pass. The
/// default banner is empty.
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e553a04..8dada05 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/InlineAsm.h"
#include "llvm/LLVMContext.h"
@@ -33,7 +34,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LeakDetector.h"
@@ -186,7 +186,8 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
}
/// isIdenticalTo - Return true if this operand is identical to the specified
-/// operand.
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
if (getType() != Other.getType() ||
getTargetFlags() != Other.getTargetFlags())
@@ -227,6 +228,46 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
llvm_unreachable("Invalid machine operand type");
}
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(),
+ MO.getSubReg(), MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+ case MachineOperand::MO_FrameIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ConstantPoolIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+ MO.getOffset());
+ case MachineOperand::MO_JumpTableIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ MO.getSymbolName());
+ case MachineOperand::MO_GlobalAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+ MO.getOffset());
+ case MachineOperand::MO_BlockAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getBlockAddress());
+ case MachineOperand::MO_RegisterMask:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_Metadata:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+ case MachineOperand::MO_MCSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
/// print - Print the specified machine operand.
///
void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
@@ -255,12 +296,16 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "imp-";
OS << "def";
NeedComma = true;
+ // <def,read-undef> only makes sense when getSubReg() is set.
+ // Don't clutter the output otherwise.
+ if (isUndef() && getSubReg())
+ OS << ",read-undef";
} else if (isImplicit()) {
OS << "imp-use";
NeedComma = true;
}
- if (isKill() || isDead() || isUndef() || isInternalRead()) {
+ if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) {
if (NeedComma) OS << ',';
NeedComma = false;
if (isKill()) {
@@ -271,7 +316,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "dead";
NeedComma = true;
}
- if (isUndef()) {
+ if (isUndef() && isUse()) {
if (NeedComma) OS << ',';
OS << "undef";
NeedComma = true;
@@ -656,7 +701,9 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
- assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) &&
+ // RegMask operands go between the explicit and implicit operands.
+ assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
+ OpNo < MCID->getNumOperands()) &&
"Trying to add an operand to a machine instr that is already done!");
// All operands from OpNo have been removed from RegInfo. If the Operands
@@ -868,7 +915,8 @@ void MachineInstr::eraseFromParent() {
MBB->erase(MI);
}
}
- getParent()->erase(this);
+ // Erase the individual instruction, which may itself be inside a bundle.
+ getParent()->erase_instr(this);
}
@@ -938,9 +986,13 @@ const TargetRegisterClass*
MachineInstr::getRegClassConstraint(unsigned OpIdx,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const {
+ assert(getParent() && "Can't have an MBB reference here!");
+ assert(getParent()->getParent() && "Can't have an MF reference here!");
+ const MachineFunction &MF = *getParent()->getParent();
+
// Most opcodes have fixed constraints in their MCInstrDesc.
if (!isInlineAsm())
- return TII->getRegClass(getDesc(), OpIdx, TRI);
+ return TII->getRegClass(getDesc(), OpIdx, TRI, MF);
if (!getOperand(OpIdx).isReg())
return NULL;
@@ -962,7 +1014,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
// Assume that all registers in a memory operand are pointers.
if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
- return TRI->getPointerRegClass();
+ return TRI->getPointerRegClass(MF);
return NULL;
}
@@ -1530,12 +1582,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
bool HasAliasLive = false;
- for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
- unsigned AliasReg = *Alias; ++Alias)
+ for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true);
+ AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
HasAliasLive = true;
break;
}
+ }
if (!HasAliasLive) {
OmittedAnyCallClobbers = true;
continue;
@@ -1667,7 +1721,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
- bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
bool Found = false;
SmallVector<unsigned,4> DeadOps;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
@@ -1739,7 +1794,8 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
- bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
bool Found = false;
SmallVector<unsigned,4> DeadOps;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
@@ -1758,9 +1814,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
// There exists a super-register that's marked dead.
if (RegInfo->isSuperRegister(IncomingReg, Reg))
return true;
- if (RegInfo->getSubRegisters(IncomingReg) &&
- RegInfo->getSuperRegisters(Reg) &&
- RegInfo->isSubRegister(IncomingReg, Reg))
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
DeadOps.push_back(i);
}
}
@@ -1841,52 +1895,16 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
// Build up a buffer of hash code components.
- //
- // FIXME: This is a total hack. We should have a hash_value overload for
- // MachineOperand, but currently that doesn't work because there are many
- // different ideas of "equality" and thus different sets of information that
- // contribute to the hash code. This one happens to want to take a specific
- // subset. And it's still not clear that this routine uses the *correct*
- // subset of information when computing the hash code. The goal is to use the
- // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
- // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
- // be very useful to factor the selection of relevant inputs out of the two
- // functions and into a common routine, but it's not clear how that can be
- // done.
SmallVector<size_t, 8> HashComponents;
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- switch (MO.getType()) {
- default: break;
- case MachineOperand::MO_Register:
- if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue; // Skip virtual register defs.
- HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
- break;
- case MachineOperand::MO_Immediate:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
- break;
- case MachineOperand::MO_FrameIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_JumpTableIndex:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
- break;
- case MachineOperand::MO_MachineBasicBlock:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
- break;
- case MachineOperand::MO_GlobalAddress:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
- break;
- case MachineOperand::MO_BlockAddress:
- HashComponents.push_back(hash_combine(MO.getType(),
- MO.getBlockAddress()));
- break;
- case MachineOperand::MO_MCSymbol:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
- break;
- }
+ if (MO.isReg() && MO.isDef() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+
+ HashComponents.push_back(hash_value(MO));
}
return hash_combine_range(HashComponents.begin(), HashComponents.end());
}
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 73489a7..b7de7bf 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -169,8 +169,8 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
if (!MO.isDead()) {
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
if (LocalDefSet.insert(SubReg))
LocalDefs.push_back(SubReg);
}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 8c562cc..efec481 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -445,8 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
}
if (MO.isImplicit()) {
- for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
- PhysRegClobbers.set(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegClobbers.set(*AI);
if (!MO.isDead())
// Non-dead implicit def? This cannot be hoisted.
RuledOut = true;
@@ -465,7 +465,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
// If we have already seen another instruction that defines the same
// register, then this is not safe. Two defs is indicated by setting a
// PhysRegClobbers bit.
- for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) {
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
if (PhysRegDefs.test(*AS))
PhysRegClobbers.set(*AS);
if (PhysRegClobbers.test(*AS))
@@ -517,8 +517,8 @@ void MachineLICM::HoistRegionPostRA() {
for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
E = BB->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
- PhysRegDefs.set(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegDefs.set(*AI);
}
SpeculationState = SpeculateUnknown;
@@ -540,8 +540,8 @@ void MachineLICM::HoistRegionPostRA() {
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
- TermRegs.set(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ TermRegs.set(*AI);
}
}
@@ -1260,11 +1260,11 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
if (NewOpc == 0) return 0;
const MCInstrDesc &MID = TII->get(NewOpc);
if (MID.getNumDefs() != 1) return 0;
- const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
// Ok, we're unfolding. Create a temporary register and do the unfold.
unsigned Reg = MRI->createVirtualRegister(RC);
- MachineFunction &MF = *MI->getParent()->getParent();
SmallVector<MachineInstr *, 2> NewMIs;
bool Success =
TII->unfoldMemoryOperand(MF, MI, Reg,
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 189cb2b..9f3829e 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -9,7 +9,7 @@
//
// This file defines the MachineLoopInfo class that is used to identify natural
// loops and determine the loop depth of various nodes of the CFG. Note that
-// the loops identified may actually be several natural loops that share the
+// the loops identified may actually be several natural loops that share the
// same header node... not just a single natural loop.
//
//===----------------------------------------------------------------------===//
@@ -17,17 +17,13 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
-namespace llvm {
-#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
-TEMPLATE_INSTANTIATION(MLB);
-#undef MLB
-#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
-TEMPLATE_INSTANTIATION(MLIB);
-#undef MLIB
-}
+// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
+template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
+template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
char MachineLoopInfo::ID = 0;
INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
@@ -40,7 +36,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
releaseMemory();
- LI.Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update
+ LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
return false;
}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 7ea1517..82e1235 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -162,9 +162,22 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
// Since we are in SSA form, we can use the first definition.
def_iterator I = def_begin(Reg);
+ assert((I.atEnd() || llvm::next(I) == def_end()) &&
+ "getVRegDef assumes a single definition or no definition");
return !I.atEnd() ? &*I : 0;
}
+/// getUniqueVRegDef - Return the unique machine instr that defines the
+/// specified virtual register or null if none is found. If there are
+/// multiple definitions or no definition, return null.
+MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
+ if (def_empty(Reg)) return 0;
+ def_iterator I = def_begin(Reg);
+ if (llvm::next(I) != def_end())
+ return 0;
+ return &*I;
+}
+
bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
use_iterator UI = use_begin(RegNo);
if (UI == use_end())
@@ -268,15 +281,15 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
// Check if any overlapping register is modified.
- for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
- if (!def_empty(*R))
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
+ if (!def_empty(*AI))
return false;
// Check if any overlapping register is allocatable so it may be used later.
if (AllocatableRegs.empty())
AllocatableRegs = TRI->getAllocatableSet(MF);
- for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
- if (AllocatableRegs.test(*R))
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
+ if (AllocatableRegs.test(*AI))
return false;
return true;
}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 070a557..acb1ee6 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -241,30 +241,6 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
I->second = NewReg;
}
-/// MachinePHIiter - Iterator for PHI operands. This is used for the
-/// PHI_iterator in the SSAUpdaterImpl template.
-namespace {
- class MachinePHIiter {
- private:
- MachineInstr *PHI;
- unsigned idx;
-
- public:
- explicit MachinePHIiter(MachineInstr *P) // begin iterator
- : PHI(P), idx(1) {}
- MachinePHIiter(MachineInstr *P, bool) // end iterator
- : PHI(P), idx(PHI->getNumOperands()) {}
-
- MachinePHIiter &operator++() { idx += 2; return *this; }
- bool operator==(const MachinePHIiter& x) const { return idx == x.idx; }
- bool operator!=(const MachinePHIiter& x) const { return !operator==(x); }
- unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
- MachineBasicBlock *getIncomingBlock() {
- return PHI->getOperand(idx+1).getMBB();
- }
- };
-}
-
/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
/// template, specialized for MachineSSAUpdater.
namespace llvm {
@@ -279,7 +255,26 @@ public:
static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
- typedef MachinePHIiter PHI_iterator;
+ /// Iterator for PHI operands.
+ class PHI_iterator {
+ private:
+ MachineInstr *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(MachineInstr *P) // begin iterator
+ : PHI(P), idx(1) {}
+ PHI_iterator(MachineInstr *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumOperands()) {}
+
+ PHI_iterator &operator++() { idx += 2; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+ unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+ MachineBasicBlock *getIncomingBlock() {
+ return PHI->getOperand(idx+1).getMBB();
+ }
+ };
static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
static inline PHI_iterator PHI_end(PhiT *PHI) {
return PHI_iterator(PHI, true);
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 1d3241b..a1dc948 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -17,9 +17,13 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -50,6 +54,15 @@ static bool ViewMISchedDAGs = false;
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
+MachineSchedContext::MachineSchedContext():
+ MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
+ RegClassInfo = new RegisterClassInfo();
+}
+
+MachineSchedContext::~MachineSchedContext() {
+ delete RegClassInfo;
+}
+
namespace {
/// MachineScheduler runs after coalescing and before register allocation.
class MachineScheduler : public MachineSchedContext,
@@ -122,6 +135,29 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+/// Decrement this iterator until reaching the top or a non-debug instr.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
+ assert(I != Beg && "reached the top of the region, cannot decrement");
+ while (--I != Beg) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
/// Top-level MachineScheduler pass driver.
///
/// Visit blocks in function order. Divide each block into scheduling regions
@@ -139,6 +175,8 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
+
// Initialize the context of the pass.
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
@@ -149,6 +187,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
LIS = &getAnalysis<LiveIntervals>();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ RegClassInfo->runOnMachineFunction(*MF);
+
// Select the scheduler, or set the default.
MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
if (Ctor == useDefaultMachineSched) {
@@ -163,13 +203,16 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
// Visit all machine basic blocks.
+ //
+ // TODO: Visit blocks in global postorder or postorder within the bottom-up
+ // loop tree. Then we can optionally compute global RegPressure.
for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
MBB != MBBEnd; ++MBB) {
Scheduler->startBlock(MBB);
// Break the block into scheduling regions [I, RegionEnd), and schedule each
- // region as soon as it is discovered. RegionEnd points the the scheduling
+ // region as soon as it is discovered. RegionEnd points the scheduling
// boundary at the bottom of the region. The DAG does not include RegionEnd,
// but the region does (i.e. the next RegionEnd is above the previous
// RegionBegin). If the current block has no terminator then RegionEnd ==
@@ -181,6 +224,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
unsigned RemainingCount = MBB->size();
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+
// Avoid decrementing RegionEnd for blocks with no terminator.
if (RegionEnd != MBB->end()
|| TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
@@ -207,7 +251,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
Scheduler->exitRegion();
continue;
}
- DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(dbgs() << MF->getFunction()->getName()
<< ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
@@ -260,6 +305,9 @@ public:
/// be scheduled at the bottom.
virtual SUnit *pickNode(bool &IsTopNode) = 0;
+ /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node.
+ virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
+
/// When all predecessor dependencies have been resolved, free this node for
/// top-down scheduling.
virtual void releaseTopNode(SUnit *SU) = 0;
@@ -279,22 +327,45 @@ namespace {
/// machine instructions while updating LiveIntervals.
class ScheduleDAGMI : public ScheduleDAGInstrs {
AliasAnalysis *AA;
+ RegisterClassInfo *RegClassInfo;
MachineSchedStrategy *SchedImpl;
+ MachineBasicBlock::iterator LiveRegionEnd;
+
+ /// Register pressure in this region computed by buildSchedGraph.
+ IntervalPressure RegPressure;
+ RegPressureTracker RPTracker;
+
+ /// List of pressure sets that exceed the target's pressure limit before
+ /// scheduling, listed in increasing set ID order. Each pressure set is paired
+ /// with its max pressure in the currently scheduled regions.
+ std::vector<PressureElement> RegionCriticalPSets;
+
/// The top of the unscheduled zone.
MachineBasicBlock::iterator CurrentTop;
+ IntervalPressure TopPressure;
+ RegPressureTracker TopRPTracker;
/// The bottom of the unscheduled zone.
MachineBasicBlock::iterator CurrentBottom;
+ IntervalPressure BotPressure;
+ RegPressureTracker BotRPTracker;
+#ifndef NDEBUG
/// The number of instructions scheduled so far. Used to cut off the
/// scheduler at the point determined by misched-cutoff.
unsigned NumInstrsScheduled;
+#endif
public:
ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
- AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(),
- NumInstrsScheduled(0) {}
+ AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
+ RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
+ CurrentBottom(), BotRPTracker(BotPressure) {
+#ifndef NDEBUG
+ NumInstrsScheduled = 0;
+#endif
+ }
~ScheduleDAGMI() {
delete SchedImpl;
@@ -303,22 +374,68 @@ public:
MachineBasicBlock::iterator top() const { return CurrentTop; }
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
- /// Implement ScheduleDAGInstrs interface.
+ /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
+ /// region. This covers all instructions in a block, while schedule() may only
+ /// cover a subset.
+ void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount);
+
+ /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
+ /// reorderable instructions.
void schedule();
+ /// Get current register pressure for the top scheduled instructions.
+ const IntervalPressure &getTopPressure() const { return TopPressure; }
+ const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
+
+ /// Get current register pressure for the bottom scheduled instructions.
+ const IntervalPressure &getBotPressure() const { return BotPressure; }
+ const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
+
+ /// Get register pressure for the entire scheduling region before scheduling.
+ const IntervalPressure &getRegPressure() const { return RegPressure; }
+
+ const std::vector<PressureElement> &getRegionCriticalPSets() const {
+ return RegionCriticalPSets;
+ }
+
+ /// getIssueWidth - Return the max instructions per scheduling group.
+ unsigned getIssueWidth() const {
+ return (InstrItins && InstrItins->SchedModel)
+ ? InstrItins->SchedModel->IssueWidth : 1;
+ }
+
+ /// getNumMicroOps - Return the number of issue slots required for this MI.
+ unsigned getNumMicroOps(MachineInstr *MI) const {
+ if (!InstrItins) return 1;
+ int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
+ return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
+ }
+
protected:
+ void initRegPressure();
+ void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
+
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
bool checkSchedLimit();
+ void releaseRoots();
+
void releaseSucc(SUnit *SU, SDep *SuccEdge);
void releaseSuccessors(SUnit *SU);
void releasePred(SUnit *SU, SDep *PredEdge);
void releasePredecessors(SUnit *SU);
+
+ void placeDebugValues();
};
} // namespace
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
/// NumPredsLeft reaches zero, release the successor node.
+///
+/// FIXME: Adjust SuccSU height based on MinLatency.
void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
@@ -345,6 +462,8 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
/// NumSuccsLeft reaches zero, release the predecessor node.
+///
+/// FIXME: Adjust PredSU height based on MinLatency.
void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
@@ -371,12 +490,17 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
MachineBasicBlock::iterator InsertPos) {
- // Fix RegionBegin if the first instruction moves down.
+ // Advance RegionBegin if the first instruction moves down.
if (&*RegionBegin == MI)
- RegionBegin = llvm::next(RegionBegin);
+ ++RegionBegin;
+
+ // Update the instruction stream.
BB->splice(InsertPos, BB, MI);
+
+ // Update LiveIntervals
LIS->handleMove(MI);
- // Fix RegionBegin if another instruction moves above the first instruction.
+
+ // Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
RegionBegin = MI;
}
@@ -392,12 +516,114 @@ bool ScheduleDAGMI::checkSchedLimit() {
return true;
}
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount)
+{
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+
+ // For convenience remember the end of the liveness region.
+ LiveRegionEnd =
+ (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
+}
+
+// Setup the register pressure trackers for the top scheduled top and bottom
+// scheduled regions.
+void ScheduleDAGMI::initRegPressure() {
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ DEBUG(RPTracker.getPressure().dump(TRI));
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Close one end of the tracker so we can call
+ // getMaxUpward/DownwardPressureDelta before advancing across any
+ // instructions. This converts currently live regs into live ins/outs.
+ TopRPTracker.closeTop();
+ BotRPTracker.closeBottom();
+
+ // Account for liveness generated by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ BotRPTracker.recede();
+
+ assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+
+ // Cache the list of excess pressure sets in this region. This will also track
+ // the max pressure in the scheduled code for these sets.
+ RegionCriticalPSets.clear();
+ std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
+ for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ if (RegionPressure[i] > Limit)
+ RegionCriticalPSets.push_back(PressureElement(i, 0));
+ }
+ DEBUG(dbgs() << "Excess PSets: ";
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ dbgs() << TRI->getRegPressureSetName(
+ RegionCriticalPSets[i].PSetID) << " ";
+ dbgs() << "\n");
+}
+
+// FIXME: When the pressure tracker deals in pressure differences then we won't
+// iterate over all RegionCriticalPSets[i].
+void ScheduleDAGMI::
+updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
+ unsigned ID = RegionCriticalPSets[i].PSetID;
+ int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
+ if ((int)NewMaxPressure[ID] > MaxUnits)
+ MaxUnits = NewMaxPressure[ID];
+ }
+}
+
+// Release all DAG roots for scheduling.
+void ScheduleDAGMI::releaseRoots() {
+ SmallVector<SUnit*, 16> BotRoots;
+
+ for (std::vector<SUnit>::iterator
+ I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (I->Preds.empty())
+ SchedImpl->releaseTopNode(&(*I));
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (I->Succs.empty())
+ BotRoots.push_back(&(*I));
+ }
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
+ SchedImpl->releaseBottomNode(*I);
+}
+
/// schedule - Called back from MachineScheduler::runOnMachineFunction
-/// after setting up the current scheduling region.
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
void ScheduleDAGMI::schedule() {
- buildSchedGraph(AA);
+ // Initialize the register pressure tracker used by buildSchedGraph.
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Account for liveness generate by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ RPTracker.recede();
+
+ // Build the DAG, and compute current register pressure.
+ buildSchedGraph(AA, &RPTracker);
+
+ // Initialize top/bottom trackers after computing region pressure.
+ initRegPressure();
- DEBUG(dbgs() << "********** MI Scheduling **********\n");
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
@@ -410,22 +636,12 @@ void ScheduleDAGMI::schedule() {
releasePredecessors(&ExitSU);
// Release all DAG roots for scheduling.
- for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end();
- I != E; ++I) {
- // A SUnit is ready to top schedule if it has no predecessors.
- if (I->Preds.empty())
- SchedImpl->releaseTopNode(&(*I));
- // A SUnit is ready to bottom schedule if it has no successors.
- if (I->Succs.empty())
- SchedImpl->releaseBottomNode(&(*I));
- }
+ releaseRoots();
- CurrentTop = RegionBegin;
+ CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
CurrentBottom = RegionEnd;
bool IsTopNode = false;
while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
- DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
- << " Scheduling Instruction:\n"; SU->dump(this));
if (!checkSchedLimit())
break;
@@ -435,28 +651,69 @@ void ScheduleDAGMI::schedule() {
if (IsTopNode) {
assert(SU->isTopReady() && "node still has unscheduled dependencies");
if (&*CurrentTop == MI)
- ++CurrentTop;
- else
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else {
moveInstruction(MI, CurrentTop);
+ TopRPTracker.setPos(MI);
+ }
+
+ // Update top scheduled pressure.
+ TopRPTracker.advance();
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+
// Release dependent instructions for scheduling.
releaseSuccessors(SU);
}
else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
- if (&*llvm::prior(CurrentBottom) == MI)
- --CurrentBottom;
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
else {
- if (&*CurrentTop == MI)
- CurrentTop = llvm::next(CurrentTop);
+ if (&*CurrentTop == MI) {
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ TopRPTracker.setPos(CurrentTop);
+ }
moveInstruction(MI, CurrentBottom);
CurrentBottom = MI;
}
+ // Update bottom scheduled pressure.
+ BotRPTracker.recede();
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
+
// Release dependent instructions for scheduling.
releasePredecessors(SU);
}
SU->isScheduled = true;
+ SchedImpl->schedNode(SU, IsTopNode);
}
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void ScheduleDAGMI::placeDebugValues() {
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue) {
+ BB->splice(RegionBegin, BB, FirstDbgValue);
+ RegionBegin = FirstDbgValue;
+ }
+
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrevMI = P.second;
+ BB->splice(++OrigPrevMI, BB, DbgValue);
+ if (OrigPrevMI == llvm::prior(RegionEnd))
+ RegionEnd = DbgValue;
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
}
//===----------------------------------------------------------------------===//
@@ -464,56 +721,603 @@ void ScheduleDAGMI::schedule() {
//===----------------------------------------------------------------------===//
namespace {
+/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
+/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
+/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
+class ReadyQueue {
+ unsigned ID;
+ std::string Name;
+ std::vector<SUnit*> Queue;
+
+public:
+ ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
+
+ unsigned getID() const { return ID; }
+
+ StringRef getName() const { return Name; }
+
+ // SU is in this queue if it's NodeQueueID is a superset of this ID.
+ bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
+
+ bool empty() const { return Queue.empty(); }
+
+ unsigned size() const { return Queue.size(); }
+
+ typedef std::vector<SUnit*>::iterator iterator;
+
+ iterator begin() { return Queue.begin(); }
+
+ iterator end() { return Queue.end(); }
+
+ iterator find(SUnit *SU) {
+ return std::find(Queue.begin(), Queue.end(), SU);
+ }
+
+ void push(SUnit *SU) {
+ Queue.push_back(SU);
+ SU->NodeQueueId |= ID;
+ }
+
+ void remove(iterator I) {
+ (*I)->NodeQueueId &= ~ID;
+ *I = Queue.back();
+ Queue.pop_back();
+ }
+
+ void dump() {
+ dbgs() << Name << ": ";
+ for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+ dbgs() << Queue[i]->NodeNum << " ";
+ dbgs() << "\n";
+ }
+};
+
/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule.
class ConvergingScheduler : public MachineSchedStrategy {
+
+ /// Store the state used by ConvergingScheduler heuristics, required for the
+ /// lifetime of one invocation of pickNode().
+ struct SchedCandidate {
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ // Register pressure values for the best candidate.
+ RegPressureDelta RPDelta;
+
+ SchedCandidate(): SU(NULL) {}
+ };
+ /// Represent the type of SchedCandidate found within a single queue.
+ enum CandResult {
+ NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure };
+
+ /// Each Scheduling boundary is associated with ready queues. It tracks the
+ /// current cycle in whichever direction at has moved, and maintains the state
+ /// of "hazards" and other interlocks at the current cycle.
+ struct SchedBoundary {
+ ScheduleDAGMI *DAG;
+
+ ReadyQueue Available;
+ ReadyQueue Pending;
+ bool CheckPending;
+
+ ScheduleHazardRecognizer *HazardRec;
+
+ unsigned CurrCycle;
+ unsigned IssueCount;
+
+ /// MinReadyCycle - Cycle of the soonest available instruction.
+ unsigned MinReadyCycle;
+
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+
+ /// Pending queues extend the ready queues with the same ID and the
+ /// PendingFlag set.
+ SchedBoundary(unsigned ID, const Twine &Name):
+ DAG(0), Available(ID, Name+".A"),
+ Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+ CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
+ MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+
+ ~SchedBoundary() { delete HazardRec; }
+
+ bool isTop() const {
+ return Available.getID() == ConvergingScheduler::TopQID;
+ }
+
+ bool checkHazard(SUnit *SU);
+
+ void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+ void bumpCycle();
+
+ void bumpNode(SUnit *SU);
+
+ void releasePending();
+
+ void removeReady(SUnit *SU);
+
+ SUnit *pickOnlyChoice();
+ };
+
ScheduleDAGMI *DAG;
+ const TargetRegisterInfo *TRI;
- unsigned NumTopReady;
- unsigned NumBottomReady;
+ // State of the top and bottom scheduled instruction boundaries.
+ SchedBoundary Top;
+ SchedBoundary Bot;
public:
- virtual void initialize(ScheduleDAGMI *dag) {
- DAG = dag;
+ /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+ enum {
+ TopQID = 1,
+ BotQID = 2,
+ LogMaxQID = 2
+ };
+
+ ConvergingScheduler():
+ DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+
+ virtual void initialize(ScheduleDAGMI *dag);
+
+ virtual SUnit *pickNode(bool &IsTopNode);
+
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+
+ virtual void releaseTopNode(SUnit *SU);
+
+ virtual void releaseBottomNode(SUnit *SU);
+
+protected:
+ SUnit *pickNodeBidrectional(bool &IsTopNode);
- assert((!ForceTopDown || !ForceBottomUp) &&
- "-misched-topdown incompatible with -misched-bottomup");
+ CandResult pickNodeFromQueue(ReadyQueue &Q,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate);
+#ifndef NDEBUG
+ void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
+ PressureElement P = PressureElement());
+#endif
+};
+} // namespace
+
+void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+ TRI = DAG->TRI;
+ Top.DAG = dag;
+ Bot.DAG = dag;
+
+ // Initialize the HazardRecognizers.
+ const TargetMachine &TM = DAG->MF.getTarget();
+ const InstrItineraryData *Itin = TM.getInstrItineraryData();
+ Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingScheduler::releaseTopNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned Latency =
+ DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + Latency)
+ SU->TopReadyCycle = PredReadyCycle + Latency;
}
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
- virtual SUnit *pickNode(bool &IsTopNode) {
- if (DAG->top() == DAG->bottom())
- return NULL;
+void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
- // As an initial placeholder heuristic, schedule in the direction that has
- // the fewest choices.
- SUnit *SU;
- if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) {
- SU = DAG->getSUnit(DAG->top());
- IsTopNode = true;
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned Latency =
+ DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + Latency)
+ SU->BotReadyCycle = SuccReadyCycle + Latency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
+ unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingScheduler::SchedBoundary::bumpCycle() {
+ unsigned Width = DAG->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ }
+ else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
}
- else {
- SU = DAG->getSUnit(llvm::prior(DAG->bottom()));
- IsTopNode = false;
+ }
+ CheckPending = true;
+
+ DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
}
- if (SU->isTopReady()) {
- assert(NumTopReady > 0 && "bad ready count");
- --NumTopReady;
+ HazardRec->EmitInstruction(SU);
+ }
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += DAG->getNumMicroOps(SU->getInstr());
+ if (IssueCount >= DAG->getIssueWidth()) {
+ DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingScheduler::SchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin()+i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin()+i);
+ --i; --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ for (unsigned i = 0; Available.empty(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return NULL;
+}
+
+#ifndef NDEBUG
+void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
+ SUnit *SU, PressureElement P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
+ << " ";
+ else
+ dbgs() << " ";
+ SU->dump(DAG);
+}
+#endif
+
+/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
+/// more desirable than RHS from scheduling standpoint.
+static bool compareRPDelta(const RegPressureDelta &LHS,
+ const RegPressureDelta &RHS) {
+ // Compare each component of pressure in decreasing order of importance
+ // without checking if any are valid. Invalid PressureElements are assumed to
+ // have UnitIncrease==0, so are neutral.
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease)
+ return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease)
+ return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
+
+ // Avoid increasing the max pressure of the entire region.
+ if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease)
+ return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
+
+ return false;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingScheduler::CandResult ConvergingScheduler::
+pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ DEBUG(Q.dump());
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+ // Avoid exceeding the target's limit.
+ if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) {
+ DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = SingleExcess;
+ continue;
+ }
+ if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease)
+ continue;
+ if (FoundCandidate == SingleExcess)
+ FoundCandidate = MultiPressure;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (RPDelta.CriticalMax.UnitIncrease
+ < Candidate.RPDelta.CriticalMax.UnitIncrease) {
+ DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = SingleCritical;
+ continue;
+ }
+ if (RPDelta.CriticalMax.UnitIncrease
+ > Candidate.RPDelta.CriticalMax.UnitIncrease)
+ continue;
+ if (FoundCandidate == SingleCritical)
+ FoundCandidate = MultiPressure;
+
+ // Avoid increasing the max pressure of the entire region.
+ if (RPDelta.CurrentMax.UnitIncrease
+ < Candidate.RPDelta.CurrentMax.UnitIncrease) {
+ DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = SingleMax;
+ continue;
}
- if (SU->isBottomReady()) {
- assert(NumBottomReady > 0 && "bad ready count");
- --NumBottomReady;
+ if (RPDelta.CurrentMax.UnitIncrease
+ > Candidate.RPDelta.CurrentMax.UnitIncrease)
+ continue;
+ if (FoundCandidate == SingleMax)
+ FoundCandidate = MultiPressure;
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
+ || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ DEBUG(traceCandidate("NCAND", Q, *I));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = NodeOrder;
}
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
return SU;
}
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult = pickNodeFromQueue(Bot.Available,
+ DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult = pickNodeFromQueue(Top.Available,
+ DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Check for a salient pressure difference and pick the best from either side.
+ if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ IsTopNode = false;
+ return BotCand.SU;
+}
- virtual void releaseTopNode(SUnit *SU) {
- ++NumTopReady;
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return NULL;
}
- virtual void releaseBottomNode(SUnit *SU) {
- ++NumBottomReady;
+ SUnit *SU;
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
}
-};
-} // namespace
+ else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ }
+ else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling Instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
+ SU->dump(DAG));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
+/// it's state based on the current cycle before MachineSchedStrategy does.
+void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU);
+ }
+ else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU);
+ }
+}
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
@@ -592,6 +1396,8 @@ public:
return SU;
}
+ virtual void schedNode(SUnit *SU, bool IsTopNode) {}
+
virtual void releaseTopNode(SUnit *SU) {
TopQ.push(SU);
}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 74ba94d..d8dece6 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -89,8 +89,8 @@ namespace {
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
- RV.push_back(*R);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RV.push_back(*SubRegs);
}
struct BBInfo {
@@ -191,9 +191,11 @@ namespace {
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineBundleBefore(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBundleAfter(const MachineInstr *MI);
void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
void visitMachineFunctionAfter();
@@ -288,6 +290,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
MFI!=MFE; ++MFI) {
visitMachineBasicBlockBefore(MFI);
+ // Keep track of the current bundle header.
+ const MachineInstr *CurBundle = 0;
for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
if (MBBI->getParent() != MFI) {
@@ -295,15 +299,21 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
*OS << "Instruction: " << *MBBI;
continue;
}
- // Skip BUNDLE instruction for now. FIXME: We should add code to verify
- // the BUNDLE's specifically.
- if (MBBI->isBundle())
- continue;
+ // Is this a bundle header?
+ if (!MBBI->isInsideBundle()) {
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ CurBundle = MBBI;
+ visitMachineBundleBefore(CurBundle);
+ } else if (!CurBundle)
+ report("No bundle header", MBBI);
visitMachineInstrBefore(MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
visitMachineOperand(&MBBI->getOperand(I), I);
visitMachineInstrAfter(MBBI);
}
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
visitMachineBasicBlockAfter(MFI);
}
visitMachineFunctionAfter();
@@ -384,10 +394,10 @@ void MachineVerifier::visitMachineFunctionBefore() {
// A sub-register of a reserved register is also reserved
for (int Reg = regsReserved.find_first(); Reg>=0;
Reg = regsReserved.find_next(Reg)) {
- for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
// FIXME: This should probably be:
- // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
- regsReserved.set(*Sub);
+ // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register");
+ regsReserved.set(*SubRegs);
}
}
@@ -466,8 +476,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && MBB->back().isBarrier() &&
- !TII->isPredicated(&MBB->back())) {
+ if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() &&
+ !TII->isPredicated(getBundleStart(&MBB->back()))) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
}
@@ -487,10 +497,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via unconditional branch but doesn't contain "
"any instructions!", MBB);
- } else if (!MBB->back().isBarrier()) {
+ } else if (!getBundleStart(&MBB->back())->isBarrier()) {
report("MBB exits via unconditional branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().isTerminator()) {
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
report("MBB exits via unconditional branch but the branch isn't a "
"terminator instruction!", MBB);
}
@@ -510,10 +520,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/fall-through but doesn't "
"contain any instructions!", MBB);
- } else if (MBB->back().isBarrier()) {
+ } else if (getBundleStart(&MBB->back())->isBarrier()) {
report("MBB exits via conditional branch/fall-through but ends with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().isTerminator()) {
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
report("MBB exits via conditional branch/fall-through but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -530,10 +540,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/branch but doesn't "
"contain any instructions!", MBB);
- } else if (!MBB->back().isBarrier()) {
+ } else if (!getBundleStart(&MBB->back())->isBarrier()) {
report("MBB exits via conditional branch/branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().isTerminator()) {
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
report("MBB exits via conditional branch/branch but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -554,8 +564,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
continue;
}
regsLive.insert(*I);
- for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++)
- regsLive.insert(*R);
+ for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
}
regsLiveInButUnused = regsLive;
@@ -564,8 +574,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
BitVector PR = MFI->getPristineRegs(MBB);
for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
regsLive.insert(I);
- for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++)
- regsLive.insert(*R);
+ for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
}
regsKilled.clear();
@@ -575,6 +585,30 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
lastIndex = Indexes->getMBBStartIdx(MBB);
}
+// This function gets called for all bundle headers, including normal
+// stand-alone unbundled instructions.
+void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
+ if (Indexes && Indexes->hasIndex(MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ *OS << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
+
+ // Ensure non-terminators don't follow terminators.
+ // Ignore predicated terminators formed by if conversion.
+ // FIXME: If conversion shouldn't need to violate this rule.
+ if (MI->isTerminator() && !TII->isPredicated(MI)) {
+ if (!FirstTerminator)
+ FirstTerminator = MI;
+ } else if (FirstTerminator) {
+ report("Non-terminator instruction after the first terminator", MI);
+ *OS << "First terminator was:\t" << *FirstTerminator;
+ }
+}
+
void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
if (MI->getNumOperands() < MCID.getNumOperands()) {
@@ -608,17 +642,6 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
- // Ensure non-terminators don't follow terminators.
- // Ignore predicated terminators formed by if conversion.
- // FIXME: If conversion shouldn't need to violate this rule.
- if (MI->isTerminator() && !TII->isPredicated(MI)) {
- if (!FirstTerminator)
- FirstTerminator = MI;
- } else if (FirstTerminator) {
- report("Non-terminator instruction after the first terminator", MI);
- *OS << "First terminator was:\t" << *FirstTerminator;
- }
-
StringRef ErrorInfo;
if (!TII->verifyInstruction(MI, ErrorInfo))
report(ErrorInfo.data(), MI);
@@ -634,7 +657,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MONum < MCID.getNumDefs()) {
if (!MO->isReg())
report("Explicit definition must be a register", MO, MONum);
- else if (!MO->isDef())
+ else if (!MO->isDef() && !MCOI.isOptionalDef())
report("Explicit definition marked as use", MO, MONum);
else if (MO->isImplicit())
report("Explicit definition marked as implicit", MO, MONum);
@@ -672,7 +695,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("Illegal subregister index for physical register", MO, MONum);
return;
}
- if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (!DRC->contains(Reg)) {
report("Illegal physical register for instruction", MO, MONum);
*OS << TRI->getName(Reg) << " is not a "
@@ -698,7 +722,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
}
}
- if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (SubIdx) {
const TargetRegisterClass *SuperRC =
TRI->getLargestLegalSuperClass(RC);
@@ -812,6 +837,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Reserved registers may be used even when 'dead'.
if (!isReserved(Reg))
report("Using an undefined physical register", MO, MONum);
+ } else if (MRI->def_empty(Reg)) {
+ report("Reading virtual register without a def", MO, MONum);
} else {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
// We don't know which virtual registers are live in, so only complain
@@ -841,12 +868,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Check LiveInts for a live range, but only for virtual registers.
if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
!LiveInts->isNotInMIMap(MI)) {
- SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot();
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI);
+ DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
if (LiveInts->hasInterval(Reg)) {
const LiveInterval &LI = LiveInts->getInterval(Reg);
if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
assert(VNI && "NULL valno is not allowed");
- if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+ if (VNI->def != DefIdx) {
report("Inconsistent valno->def", MO, MONum);
*OS << "Valno " << VNI->id << " is not defined at "
<< DefIdx << " in " << LI << '\n';
@@ -863,6 +891,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+}
+
+// This function gets called after visiting all instructions in a bundle. The
+// argument points to the bundle header.
+// Normal stand-alone instructions are also considered 'bundles', and this
+// function is called for all of them.
+void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
set_union(MInfo.regsKilled, regsKilled);
set_subtract(regsLive, regsKilled); regsKilled.clear();
@@ -876,15 +911,6 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
}
set_subtract(regsLive, regsDead); regsDead.clear();
set_union(regsLive, regsDefined); regsDefined.clear();
-
- if (Indexes && Indexes->hasIndex(MI)) {
- SlotIndex idx = Indexes->getInstructionIndex(MI);
- if (!(idx > lastIndex)) {
- report("Instruction index out of order", MI);
- *OS << "Last instruction was at " << lastIndex << '\n';
- }
- lastIndex = idx;
- }
}
void
@@ -1025,7 +1051,21 @@ void MachineVerifier::visitMachineFunctionAfter() {
// Now check liveness info if available
calcRegsRequired();
- if (MRI->isSSA() && !MF->empty()) {
+ // Check for killed virtual registers that should be live out.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ if (MInfo.regsKilled.count(*I)) {
+ report("Virtual register killed in block, but needed live out.", MFI);
+ *OS << "Virtual register " << PrintReg(*I)
+ << " is used after the block.\n";
+ }
+ }
+
+ if (!MF->empty()) {
BBInfo &MInfo = MBBInfoMap[&MF->front()];
for (RegSet::iterator
I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
@@ -1069,20 +1109,21 @@ void MachineVerifier::verifyLiveVariables() {
void MachineVerifier::verifyLiveIntervals() {
assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
- for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
- LVE = LiveInts->end(); LVI != LVE; ++LVI) {
- const LiveInterval &LI = *LVI->second;
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
// Spilling and splitting may leave unused registers around. Skip them.
- if (MRI->use_empty(LI.reg))
+ if (MRI->reg_nodbg_empty(Reg))
continue;
- // Physical registers have much weirdness going on, mostly from coalescing.
- // We should probably fix it, but for now just ignore them.
- if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+ if (!LiveInts->hasInterval(Reg)) {
+ report("Missing live interval for virtual register", MF);
+ *OS << PrintReg(Reg, TRI) << " still has defs or uses\n";
continue;
+ }
- assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ assert(Reg == LI.reg && "Invalid reg to interval mapping");
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I!=E; ++I) {
@@ -1307,15 +1348,18 @@ void MachineVerifier::verifyLiveIntervals() {
++MFI;
continue;
}
+
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(MFI);
+
// Check that VNI is live-out of all predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
PE = MFI->pred_end(); PI != PE; ++PI) {
SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
- if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI))
- continue;
-
+ // All predecessors must have a live-out value.
if (!PVNI) {
report("Register not marked live out of predecessor", *PI);
*OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
@@ -1324,12 +1368,14 @@ void MachineVerifier::verifyLiveIntervals() {
continue;
}
- if (PVNI != VNI) {
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
report("Different value live out of predecessor", *PI);
*OS << "Valno #" << PVNI->id << " live out of BB#"
<< (*PI)->getNumber() << '@' << PEnd
<< "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+ << '@' << LiveInts->getMBBStartIdx(MFI) << " in "
+ << PrintReg(Reg) << ": " << LI << '\n';
}
}
if (&*MFI == EndMBB)
@@ -1357,4 +1403,3 @@ void MachineVerifier::verifyLiveIntervals() {
}
}
}
-
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 0ed4c34..e6e23da 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -171,23 +171,30 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
return true;
}
+/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs.
+/// This includes registers with no defs.
+static bool isImplicitlyDefined(unsigned VirtReg,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg),
+ DE = MRI->def_end(); DI != DE; ++DI)
+ if (!DI->isImplicitDef())
+ return false;
+ return true;
+}
+
/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
/// are implicit_def's.
static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
const MachineRegisterInfo *MRI) {
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
- unsigned SrcReg = MPhi->getOperand(i).getReg();
- const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- if (!DefMI || !DefMI->isImplicitDef())
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI))
return false;
- }
return true;
}
-
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
-/// under the assuption that it needs to be lowered in a way that supports
+/// under the assumption that it needs to be lowered in a way that supports
/// atomic execution of PHIs. This lowering method is always correct all of the
/// time.
///
@@ -287,7 +294,8 @@ void PHIElimination::LowerAtomicPHINode(
for (int i = NumSrcs - 1; i >= 0; --i) {
unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
-
+ bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
+ isImplicitlyDefined(SrcReg, MRI);
assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
@@ -295,14 +303,6 @@ void PHIElimination::LowerAtomicPHINode(
// path the PHI.
MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
- // If source is defined by an implicit def, there is no need to insert a
- // copy.
- MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- if (DefMI->isImplicitDef()) {
- ImpDefs.insert(DefMI);
- continue;
- }
-
// Check to make sure we haven't already emitted the copy for this block.
// This can happen because PHI nodes may have multiple entries for the same
// basic block.
@@ -315,12 +315,27 @@ void PHIElimination::LowerAtomicPHINode(
findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
// Insert the copy.
- if (!reusedIncoming && IncomingReg)
- BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);
+ if (!reusedIncoming && IncomingReg) {
+ if (SrcUndef) {
+ // The source register is undefined, so there is no need for a real
+ // COPY, but we still need to ensure joint dominance by defs.
+ // Insert an IMPLICIT_DEF instruction.
+ BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg);
+
+ // Clean up the old implicit-def, if there even was one.
+ if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
+ if (DefMI->isImplicitDef())
+ ImpDefs.insert(DefMI);
+ } else {
+ BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg)
+ .addReg(SrcReg, 0, SrcSubReg);
+ }
+ }
// Now update live variable information if we have it. Otherwise we're done
- if (!LV) continue;
+ if (SrcUndef || !LV) continue;
// We want to be able to insert a kill of the register if this PHI (aka, the
// copy we just inserted) is the last use of the source value. Live
@@ -340,39 +355,35 @@ void PHIElimination::LowerAtomicPHINode(
// add a kill marker in this block saying that it kills the incoming value!
if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
// In our final twist, we have to decide which instruction kills the
- // register. In most cases this is the copy, however, the first
- // terminator instruction at the end of the block may also use the value.
- // In this case, we should mark *it* as being the killing block, not the
- // copy.
- MachineBasicBlock::iterator KillInst;
- MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
- if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
- KillInst = Term;
-
- // Check that no other terminators use values.
-#ifndef NDEBUG
- for (MachineBasicBlock::iterator TI = llvm::next(Term);
- TI != opBlock.end(); ++TI) {
- if (TI->isDebugValue())
- continue;
- assert(!TI->readsRegister(SrcReg) &&
- "Terminator instructions cannot use virtual registers unless"
- "they are the first terminator in a block!");
- }
-#endif
- } else if (reusedIncoming || !IncomingReg) {
- // We may have to rewind a bit if we didn't insert a copy this time.
- KillInst = Term;
- while (KillInst != opBlock.begin()) {
- --KillInst;
- if (KillInst->isDebugValue())
- continue;
- if (KillInst->readsRegister(SrcReg))
- break;
+ // register. In most cases this is the copy, however, terminator
+ // instructions at the end of the block may also use the value. In this
+ // case, we should mark the last such terminator as being the killing
+ // block, not the copy.
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
}
- } else {
- // We just inserted this copy.
- KillInst = prior(InsertPos);
}
assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
@@ -412,28 +423,71 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
+ const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0;
+ bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
+
bool Changed = false;
for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
- // We break edges when registers are live out from the predecessor block
- // (not considering PHI nodes). If the register is live in to this block
- // anyway, we would gain nothing from splitting.
+ // Is there a critical edge from PreMBB to MBB?
+ if (PreMBB->succ_size() == 1)
+ continue;
+
// Avoid splitting backedges of loops. It would introduce small
// out-of-line blocks into the loop which is very bad for code placement.
- if (PreMBB != &MBB &&
- !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
- if (!MLI ||
- !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
- MLI->isLoopHeader(&MBB))) {
- if (PreMBB->SplitCriticalEdge(&MBB, this)) {
- Changed = true;
- ++NumCriticalEdgesSplit;
- }
- }
+ if (PreMBB == &MBB)
+ continue;
+ const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
+ if (IsLoopHeader && PreLoop == CurLoop)
+ continue;
+
+ // LV doesn't consider a phi use live-out, so isLiveOut only returns true
+ // when the source register is live-out for some other reason than a phi
+ // use. That means the copy we will insert in PreMBB won't be a kill, and
+ // there is a risk it may not be coalesced away.
+ //
+ // If the copy would be a kill, there is no need to split the edge.
+ if (!LV.isLiveOut(Reg, *PreMBB))
+ continue;
+
+ DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+ << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+ << ": " << *BBI);
+
+ // If Reg is not live-in to MBB, it means it must be live-in to some
+ // other PreMBB successor, and we can avoid the interference by splitting
+ // the edge.
+ //
+ // If Reg *is* live-in to MBB, the interference is inevitable and a copy
+ // is likely to be left after coalescing. If we are looking at a loop
+ // exiting edge, split it so we won't insert code in the loop, otherwise
+ // don't bother.
+ bool ShouldSplit = !LV.isLiveIn(Reg, MBB);
+
+ // Check for a loop exiting edge.
+ if (!ShouldSplit && CurLoop != PreLoop) {
+ DEBUG({
+ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
+ if (PreLoop) dbgs() << "PreLoop: " << *PreLoop;
+ if (CurLoop) dbgs() << "CurLoop: " << *CurLoop;
+ });
+ // This edge could be entering a loop, exiting a loop, or it could be
+ // both: Jumping directly form one loop to the header of a sibling
+ // loop.
+ // Split unless this edge is entering CurLoop from an outer loop.
+ ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
+ }
+ if (!ShouldSplit)
+ continue;
+ if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
+ DEBUG(dbgs() << "Failed to split ciritcal edge.\n");
+ continue;
}
+ Changed = true;
+ ++NumCriticalEdgesSplit;
}
}
return Changed;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 13d1bbc..69d6d00 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -48,6 +49,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
cl::desc("Disable Stack Slot Coloring"));
static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden,
+ cl::desc("Enable Early If-conversion"));
static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
cl::desc("Disable Machine LICM"));
static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
@@ -80,15 +83,19 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+static cl::opt<std::string>
+PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
+ cl::desc("Print machine instrs"),
+ cl::value_desc("pass-name"), cl::init("option-unspecified"));
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
/// These should be converted to boolOrDefault in order to use applyOverride.
-static AnalysisID applyDisable(AnalysisID ID, bool Override) {
+static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
if (Override)
- return &NoPassID;
- return ID;
+ return 0;
+ return PassID;
}
/// Allow Pass selection to be overriden by command line options. This supports
@@ -101,13 +108,13 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
case cl::BOU_UNSET:
return TargetID;
case cl::BOU_TRUE:
- if (TargetID != &NoPassID)
+ if (TargetID)
return TargetID;
- if (StandardID == &NoPassID)
+ if (StandardID == 0)
report_fatal_error("Target cannot enable pass");
return StandardID;
case cl::BOU_FALSE:
- return &NoPassID;
+ return 0;
}
llvm_unreachable("Invalid command line option state");
}
@@ -149,6 +156,9 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
if (StandardID == &DeadMachineInstructionElimID)
return applyDisable(TargetID, DisableMachineDCE);
+ if (StandardID == &EarlyIfConverterID)
+ return applyDisable(TargetID, !EnableEarlyIfConversion);
+
if (StandardID == &MachineLICMID)
return applyDisable(TargetID, DisableMachineLICM);
@@ -178,9 +188,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
"Target Pass Configuration", false, false)
char TargetPassConfig::ID = 0;
-static char NoPassIDAnchor = 0;
-char &llvm::NoPassID = NoPassIDAnchor;
-
// Pseudo Pass IDs.
char TargetPassConfig::EarlyTailDuplicateID = 0;
char TargetPassConfig::PostRAMachineLICMID = 0;
@@ -193,9 +200,13 @@ public:
// that are part of a standard pass pipeline without overridding the entire
// pipeline. This mechanism allows target options to inherit a standard pass's
// user interface. For example, a target may disable a standard pass by
- // default by substituting NoPass, and the user may still enable that standard
- // pass with an explicit command line option.
+ // default by substituting a pass ID of zero, and the user may still enable
+ // that standard pass with an explicit command line option.
DenseMap<AnalysisID,AnalysisID> TargetPasses;
+
+ /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
+ /// is inserted after each instance of the first one.
+ SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
};
} // namespace llvm
@@ -207,7 +218,8 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
- : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false),
+ : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0),
+ Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false),
DisableVerify(false),
EnableTailMerge(true) {
@@ -218,11 +230,22 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
initializeCodeGen(*PassRegistry::getPassRegistry());
// Substitute Pseudo Pass IDs for real ones.
- substitutePass(EarlyTailDuplicateID, TailDuplicateID);
- substitutePass(PostRAMachineLICMID, MachineLICMID);
+ substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
+ substitutePass(&PostRAMachineLICMID, &MachineLICMID);
+
+ // Disable early if-conversion. Targets that are ready can enable it.
+ disablePass(&EarlyIfConverterID);
// Temporarily disable experimental passes.
- substitutePass(MachineSchedulerID, NoPassID);
+ substitutePass(&MachineSchedulerID, 0);
+}
+
+/// Insert InsertedPassID pass after TargetPassID.
+void TargetPassConfig::insertPass(AnalysisID TargetPassID,
+ AnalysisID InsertedPassID) {
+ assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
+ std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+ Impl->InsertedPasses.push_back(P);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -234,7 +257,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetPassConfig::TargetPassConfig()
- : ImmutablePass(ID), PM(*(PassManagerBase*)0) {
+ : ImmutablePass(ID), PM(0) {
llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
}
@@ -244,8 +267,9 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) {
Opt = Val;
}
-void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) {
- Impl->TargetPasses[&StandardID] = &TargetID;
+void TargetPassConfig::substitutePass(AnalysisID StandardID,
+ AnalysisID TargetID) {
+ Impl->TargetPasses[StandardID] = TargetID;
}
AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
@@ -256,29 +280,62 @@ AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
return I->second;
}
-/// Add a CodeGen pass at this point in the pipeline after checking for target
-/// and command line overrides.
-AnalysisID TargetPassConfig::addPass(char &ID) {
+/// Add a pass to the PassManager if that pass is supposed to be run. If the
+/// Started/Stopped flags indicate either that the compilation should start at
+/// a later pass or that it should stop after an earlier pass, then do not add
+/// the pass. Finally, compare the current pass against the StartAfter
+/// and StopAfter options and change the Started/Stopped flags accordingly.
+void TargetPassConfig::addPass(Pass *P) {
assert(!Initialized && "PassConfig is immutable");
- AnalysisID TargetID = getPassSubstitution(&ID);
- AnalysisID FinalID = overridePass(&ID, TargetID);
- if (FinalID == &NoPassID)
+ // Cache the Pass ID here in case the pass manager finds this pass is
+ // redundant with ones already scheduled / available, and deletes it.
+ // Fundamentally, once we add the pass to the manager, we no longer own it
+ // and shouldn't reference it.
+ AnalysisID PassID = P->getPassID();
+
+ if (Started && !Stopped)
+ PM->add(P);
+ if (StopAfter == PassID)
+ Stopped = true;
+ if (StartAfter == PassID)
+ Started = true;
+ if (Stopped && !Started)
+ report_fatal_error("Cannot stop compilation after pass that is not run");
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
+ AnalysisID TargetID = getPassSubstitution(PassID);
+ AnalysisID FinalID = overridePass(PassID, TargetID);
+ if (FinalID == 0)
return FinalID;
Pass *P = Pass::createPass(FinalID);
if (!P)
llvm_unreachable("Pass ID not registered");
- PM.add(P);
+ addPass(P);
+ // Add the passes after the pass P if there is any.
+ for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+ I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
+ I != E; ++I) {
+ if ((*I).first == PassID) {
+ assert((*I).second && "Illegal Pass ID!");
+ Pass *NP = Pass::createPass((*I).second);
+ assert(NP && "Pass ID not registered");
+ addPass(NP);
+ }
+ }
return FinalID;
}
-void TargetPassConfig::printAndVerify(const char *Banner) const {
+void TargetPassConfig::printAndVerify(const char *Banner) {
if (TM->shouldPrintMachineCode())
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+ addPass(createMachineFunctionPrinterPass(dbgs(), Banner));
if (VerifyMachineCode)
- PM.add(createMachineVerifierPass(Banner));
+ addPass(createMachineVerifierPass(Banner));
}
/// Add common target configurable passes that perform LLVM IR to IR transforms
@@ -288,46 +345,73 @@ void TargetPassConfig::addIRPasses() {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
- PM.add(createTypeBasedAliasAnalysisPass());
- PM.add(createBasicAliasAnalysisPass());
+ addPass(createTypeBasedAliasAnalysisPass());
+ addPass(createBasicAliasAnalysisPass());
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
if (!DisableVerify)
- PM.add(createVerifierPass());
+ addPass(createVerifierPass());
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
- PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ addPass(createLoopStrengthReducePass(getTargetLowering()));
if (PrintLSR)
- PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
- PM.add(createGCLoweringPass());
+ addPass(createGCLoweringPass());
// Make sure that no unreachable blocks are instruction selected.
- PM.add(createUnreachableBlockEliminationPass());
+ addPass(createUnreachableBlockEliminationPass());
+}
+
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+void TargetPassConfig::addPassesToHandleExceptions() {
+ switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ addPass(createSjLjEHPreparePass(TM->getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ addPass(createDwarfEHPass(TM));
+ break;
+ case ExceptionHandling::None:
+ addPass(createLowerInvokePass(TM->getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ addPass(createUnreachableBlockEliminationPass());
+ break;
+ }
}
/// Add common passes that perform LLVM IR to IR transforms in preparation for
/// instruction selection.
void TargetPassConfig::addISelPrepare() {
if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
- PM.add(createCodeGenPreparePass(getTargetLowering()));
+ addPass(createCodeGenPreparePass(getTargetLowering()));
- PM.add(createStackProtectorPass(getTargetLowering()));
+ addPass(createStackProtectorPass(getTargetLowering()));
addPreISel();
if (PrintISelInput)
- PM.add(createPrintFunctionPass("\n\n"
- "*** Final LLVM Code input to ISel ***\n",
- &dbgs()));
+ addPass(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
// All passes which modify the LLVM IR are now complete; run the verifier
// to ensure that the IR is valid.
if (!DisableVerify)
- PM.add(createVerifierPass());
+ addPass(createVerifierPass());
}
/// Add the complete set of target-independent postISel code generator passes.
@@ -349,11 +433,26 @@ void TargetPassConfig::addISelPrepare() {
/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
/// before/after any target-independent pass. But it's currently overkill.
void TargetPassConfig::addMachinePasses() {
+ // Insert a machine instr printer pass after the specified pass.
+ // If -print-machineinstrs specified, print machineinstrs after all passes.
+ if (StringRef(PrintMachineInstrs.getValue()).equals(""))
+ TM->Options.PrintMachineCode = true;
+ else if (!StringRef(PrintMachineInstrs.getValue())
+ .equals("option-unspecified")) {
+ const PassRegistry *PR = PassRegistry::getPassRegistry();
+ const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
+ const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs"));
+ assert (TPI && IPI && "Pass ID not registered!");
+ const char *TID = (char *)(TPI->getTypeInfo());
+ const char *IID = (char *)(IPI->getTypeInfo());
+ insertPass(TID, IID);
+ }
+
// Print the instruction selected machine code...
printAndVerify("After Instruction Selection");
// Expand pseudo-instructions emitted by ISel.
- addPass(ExpandISelPseudosID);
+ addPass(&ExpandISelPseudosID);
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
@@ -362,7 +461,7 @@ void TargetPassConfig::addMachinePasses() {
else {
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
- addPass(LocalStackSlotAllocationID);
+ addPass(&LocalStackSlotAllocationID);
}
// Run pre-ra passes.
@@ -381,7 +480,7 @@ void TargetPassConfig::addMachinePasses() {
printAndVerify("After PostRegAlloc passes");
// Insert prolog/epilog code. Eliminate abstract frame index references...
- addPass(PrologEpilogCodeInserterID);
+ addPass(&PrologEpilogCodeInserterID);
printAndVerify("After PrologEpilogCodeInserter");
/// Add passes that optimize machine instructions after register allocation.
@@ -389,7 +488,7 @@ void TargetPassConfig::addMachinePasses() {
addMachineLateOptimization();
// Expand pseudo instructions before second scheduling pass.
- addPass(ExpandPostRAPseudosID);
+ addPass(&ExpandPostRAPseudosID);
printAndVerify("After ExpandPostRAPseudos");
// Run pre-sched2 passes.
@@ -398,14 +497,14 @@ void TargetPassConfig::addMachinePasses() {
// Second pass scheduler.
if (getOptLevel() != CodeGenOpt::None) {
- addPass(PostRASchedulerID);
+ addPass(&PostRASchedulerID);
printAndVerify("After PostRAScheduler");
}
// GC
- addPass(GCMachineCodeAnalysisID);
+ addPass(&GCMachineCodeAnalysisID);
if (PrintGCInfo)
- PM.add(createGCInfoPrinter(dbgs()));
+ addPass(createGCInfoPrinter(dbgs()));
// Basic block placement.
if (getOptLevel() != CodeGenOpt::None)
@@ -418,30 +517,31 @@ void TargetPassConfig::addMachinePasses() {
/// Add passes that optimize machine instructions in SSA form.
void TargetPassConfig::addMachineSSAOptimization() {
// Pre-ra tail duplication.
- if (addPass(EarlyTailDuplicateID) != &NoPassID)
+ if (addPass(&EarlyTailDuplicateID))
printAndVerify("After Pre-RegAlloc TailDuplicate");
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
- addPass(OptimizePHIsID);
+ addPass(&OptimizePHIsID);
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
- addPass(LocalStackSlotAllocationID);
+ addPass(&LocalStackSlotAllocationID);
// With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only
// used by tail calls, where the tail calls reuse the incoming stack
// arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
- addPass(DeadMachineInstructionElimID);
+ addPass(&DeadMachineInstructionElimID);
printAndVerify("After codegen DCE pass");
- addPass(MachineLICMID);
- addPass(MachineCSEID);
- addPass(MachineSinkingID);
+ addPass(&EarlyIfConverterID);
+ addPass(&MachineLICMID);
+ addPass(&MachineCSEID);
+ addPass(&MachineSinkingID);
printAndVerify("After Machine LICM, CSE and Sinking passes");
- addPass(PeepholeOptimizerID);
+ addPass(&PeepholeOptimizerID);
printAndVerify("After codegen peephole optimization pass");
}
@@ -519,10 +619,10 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
- addPass(PHIEliminationID);
- addPass(TwoAddressInstructionPassID);
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
- PM.add(RegAllocPass);
+ addPass(RegAllocPass);
printAndVerify("After Register Allocation");
}
@@ -530,42 +630,46 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&ProcessImplicitDefsID);
+
// LiveVariables currently requires pure SSA form.
//
// FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
// LiveVariables can be removed completely, and LiveIntervals can be directly
// computed. (We still either need to regenerate kill flags after regalloc, or
// preferably fix the scavenger to not depend on them).
- addPass(LiveVariablesID);
+ addPass(&LiveVariablesID);
// Add passes that move from transformed SSA into conventional SSA. This is a
// "copy coalescing" problem.
//
if (!EnableStrongPHIElim) {
// Edge splitting is smarter with machine loop info.
- addPass(MachineLoopInfoID);
- addPass(PHIEliminationID);
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
}
- addPass(TwoAddressInstructionPassID);
-
- // FIXME: Either remove this pass completely, or fix it so that it works on
- // SSA form. We could modify LiveIntervals to be independent of this pass, But
- // it would be even better to simply eliminate *all* IMPLICIT_DEFs before
- // leaving SSA.
- addPass(ProcessImplicitDefsID);
+ addPass(&TwoAddressInstructionPassID);
if (EnableStrongPHIElim)
- addPass(StrongPHIEliminationID);
+ addPass(&StrongPHIEliminationID);
- addPass(RegisterCoalescerID);
+ addPass(&RegisterCoalescerID);
// PreRA instruction scheduling.
- if (addPass(MachineSchedulerID) != &NoPassID)
+ if (addPass(&MachineSchedulerID))
printAndVerify("After Machine Scheduling");
// Add the selected register allocation pass.
- PM.add(RegAllocPass);
- printAndVerify("After Register Allocation");
+ addPass(RegAllocPass);
+ printAndVerify("After Register Allocation, before rewriter");
+
+ // Allow targets to change the register assignments before rewriting.
+ if (addPreRewrite())
+ printAndVerify("After pre-rewrite passes");
+
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
+ printAndVerify("After Virtual Register Rewriter");
// FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
// but eventually, all users of it should probably be moved to addPostRA and
@@ -579,12 +683,12 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
//
// FIXME: Re-enable coloring with register when it's capable of adding
// kill markers.
- addPass(StackSlotColoringID);
+ addPass(&StackSlotColoringID);
// Run post-ra machine LICM to hoist reloads / remats.
//
// FIXME: can this move into MachineLateOptimization?
- addPass(PostRAMachineLICMID);
+ addPass(&PostRAMachineLICMID);
printAndVerify("After StackSlotColoring and postra Machine LICM");
}
@@ -596,33 +700,33 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
/// Add passes that optimize machine instructions after register allocation.
void TargetPassConfig::addMachineLateOptimization() {
// Branch folding must be run after regalloc and prolog/epilog insertion.
- if (addPass(BranchFolderPassID) != &NoPassID)
+ if (addPass(&BranchFolderPassID))
printAndVerify("After BranchFolding");
// Tail duplication.
- if (addPass(TailDuplicateID) != &NoPassID)
+ if (addPass(&TailDuplicateID))
printAndVerify("After TailDuplicate");
// Copy propagation.
- if (addPass(MachineCopyPropagationID) != &NoPassID)
+ if (addPass(&MachineCopyPropagationID))
printAndVerify("After copy propagation pass");
}
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
- AnalysisID ID = &NoPassID;
+ AnalysisID PassID = 0;
if (!DisableBlockPlacement) {
// MachineBlockPlacement is a new pass which subsumes the functionality of
// CodPlacementOpt. The old code placement pass can be restored by
// disabling block placement, but eventually it will be removed.
- ID = addPass(MachineBlockPlacementID);
+ PassID = addPass(&MachineBlockPlacementID);
} else {
- ID = addPass(CodePlacementOptID);
+ PassID = addPass(&CodePlacementOptID);
}
- if (ID != &NoPassID) {
+ if (PassID) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
- addPass(MachineBlockPlacementStatsID);
+ addPass(&MachineBlockPlacementStatsID);
printAndVerify("After machine block placement.");
}
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 9c5c029..91c33c4 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -31,6 +31,15 @@
// same flag that the "cmp" instruction sets and that "bz" uses, then we can
// eliminate the "cmp" instruction.
//
+// Another instance, in this code:
+//
+// sub r1, r3 | sub r1, imm
+// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
+// bge L1
+//
+// If the branch instruction can use flag from "sub", then we can replace
+// "sub" with "subs" and eliminate the "cmp" instruction.
+//
// - Optimize Bitcast pairs:
//
// v1 = bitcast v0
@@ -95,14 +104,14 @@ namespace {
}
private:
- bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
- bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
- bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
- bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
};
@@ -116,7 +125,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
"Peephole Optimizations", false, false)
-/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
@@ -126,7 +135,7 @@ INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
bool PeepholeOptimizer::
-OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
@@ -136,16 +145,30 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
- MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
- if (++UI == MRI->use_nodbg_end())
+ if (MRI->hasOneNonDBGUse(SrcReg))
// No other uses.
return false;
+ // Ensure DstReg can get a register class that actually supports
+ // sub-registers. Don't change the class until we commit.
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
+ if (!DstRC)
+ return false;
+
+ // The ext instr may be operating on a sub-register of SrcReg as well.
+ // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
+ // register.
+ // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
+ // SrcReg:SubIdx should be replaced.
+ bool UseSrcSubIdx = TM->getRegisterInfo()->
+ getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;
+
// The source has other uses. See if we can replace the other uses with use of
// the result of the extension.
SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
- UI = MRI->use_nodbg_begin(DstReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
UI != UE; ++UI)
ReachedBBs.insert(UI->getParent());
@@ -156,8 +179,8 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallVector<MachineOperand*, 8> ExtendedUses;
bool ExtendLife = true;
- UI = MRI->use_nodbg_begin(SrcReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
UI != UE; ++UI) {
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = &*UI;
@@ -169,6 +192,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
}
+ // Only accept uses of SrcReg:SubIdx.
+ if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
+ continue;
+
// It's an error to translate this:
//
// %reg1025 = <sext> %reg1024
@@ -223,9 +250,9 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// Look for PHI uses of the extended result, we don't want to extend the
// liveness of a PHI input. It breaks all kinds of assumptions down
// stream. A PHI use is expected to be the kill of its source values.
- UI = MRI->use_nodbg_begin(DstReg);
for (MachineRegisterInfo::use_nodbg_iterator
- UE = MRI->use_nodbg_end(); UI != UE; ++UI)
+ UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
if (UI->isPHI())
PHIBBs.insert(UI->getParent());
@@ -238,14 +265,20 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
// About to add uses of DstReg, clear DstReg's kill flags.
- if (!Changed)
+ if (!Changed) {
MRI->clearKillFlags(DstReg);
+ MRI->constrainRegClass(DstReg, DstRC);
+ }
unsigned NewVR = MRI->createVirtualRegister(RC);
- BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVR)
+ MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
.addReg(DstReg, 0, SubIdx);
-
+ // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
+ if (UseSrcSubIdx) {
+ Copy->getOperand(0).setSubReg(SubIdx);
+ Copy->getOperand(0).setIsUndef();
+ }
UseMO->setReg(NewVR);
++NumReuse;
Changed = true;
@@ -255,7 +288,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
return Changed;
}
-/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
+/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that
/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
/// a value cross register classes), and the source is defined by another
/// bitcast instruction B. And if the register class of source of B matches
@@ -265,7 +298,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
/// %vreg3<def> = VMOVRS %vreg0
/// Replace all uses of vreg3 with vreg1.
-bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
+bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI,
MachineBasicBlock *MBB) {
unsigned NumDefs = MI->getDesc().getNumDefs();
unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
@@ -327,22 +360,23 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
return true;
}
-/// OptimizeCmpInstr - If the instruction is a compare and the previous
+/// optimizeCmpInstr - If the instruction is a compare and the previous
/// instruction it's comparing against all ready sets (or could be modified to
/// set) the same flag as the compare, then we can remove the comparison and use
/// the flag from the previous instruction.
-bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
+bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
MachineBasicBlock *MBB) {
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
- unsigned SrcReg;
+ unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
- if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+ (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
return false;
// Attempt to optimize the comparison instruction.
- if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
+ if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
++NumCmps;
return true;
}
@@ -368,10 +402,10 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
return false;
}
-/// FoldImmediate - Try folding register operands that are defined by move
+/// foldImmediate - Try folding register operands that are defined by move
/// immediate instructions, i.e. a trivial constant folding optimization, if
/// and only if the def and use are in the same BB.
-bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
@@ -430,7 +464,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
}
if (MI->isBitcast()) {
- if (OptimizeBitcastInstr(MI, MBB)) {
+ if (optimizeBitcastInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
Changed = true;
@@ -438,7 +472,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
} else if (MI->isCompare()) {
- if (OptimizeCmpInstr(MI, MBB)) {
+ if (optimizeCmpInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
Changed = true;
@@ -450,9 +484,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
SeenMoveImm = true;
} else {
- Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
+ Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
if (SeenMoveImm)
- Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+ Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
First = false;
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 24d3e5a..7449ff5 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -22,7 +22,6 @@
#include "AntiDepBreaker.h"
#include "AggressiveAntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
-#include "RegisterClassInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -31,6 +30,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -78,7 +78,6 @@ AntiDepBreaker::~AntiDepBreaker() { }
namespace {
class PostRAScheduler : public MachineFunctionPass {
- AliasAnalysis *AA;
const TargetInstrInfo *TII;
RegisterClassInfo RegClassInfo;
@@ -206,6 +205,10 @@ SchedulePostRATDList::SchedulePostRATDList(
const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
HazardRec =
TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+
+ assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
+ MRI.tracksLiveness()) &&
+ "Live-ins must be accurate for anti-dependency breaking");
AntiDepBreak =
((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
(AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
@@ -423,9 +426,8 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
unsigned Reg = *I;
LiveRegs.set(Reg);
// Repeat, for all subregs.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- LiveRegs.set(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
}
}
else {
@@ -437,9 +439,8 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
unsigned Reg = *I;
LiveRegs.set(Reg);
// Repeat, for all subregs.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- LiveRegs.set(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
}
}
}
@@ -464,10 +465,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
MO.setIsKill(false);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
- for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg);
- *Subreg; ++Subreg) {
- if (LiveRegs.test(*Subreg)) {
- MI->addOperand(MachineOperand::CreateReg(*Subreg,
+ for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ MI->addOperand(MachineOperand::CreateReg(*SubRegs,
true /*IsDef*/,
true /*IsImp*/,
false /*IsKill*/,
@@ -517,9 +517,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
LiveRegs.reset(Reg);
// Repeat for all subregs.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- LiveRegs.reset(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.reset(*SubRegs);
}
// Examine all used registers and set/clear kill flag. When a
@@ -536,9 +535,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
if (!killedRegs.test(Reg)) {
kill = true;
// A register is not killed if any subregs are live...
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- if (LiveRegs.test(*Subreg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
kill = false;
break;
}
@@ -570,9 +568,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
LiveRegs.set(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- LiveRegs.set(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
}
}
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 1ad3479..34d075c 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -9,297 +9,163 @@
#define DEBUG_TYPE "processimplicitdefs"
-#include "llvm/CodeGen/ProcessImplicitDefs.h"
-
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
using namespace llvm;
+namespace {
+/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
+/// for each use. Add isUndef marker to implicit_def defs and their uses.
+class ProcessImplicitDefs : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ SmallSetVector<MachineInstr*, 16> WorkList;
+
+ void processImplicitDef(MachineInstr *MI);
+ bool canTurnIntoImplicitDef(MachineInstr *MI);
+
+public:
+ static char ID;
+
+ ProcessImplicitDefs() : MachineFunctionPass(ID) {
+ initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+};
+} // end anonymous namespace
+
char ProcessImplicitDefs::ID = 0;
char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
"Process Implicit Definitions", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
"Process Implicit Definitions", false, false)
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<LiveVariables>();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- AU.addPreservedID(TwoAddressInstructionPassID);
- AU.addPreservedID(PHIEliminationID);
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool
-ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
- unsigned Reg, unsigned OpIdx,
- SmallSet<unsigned, 8> &ImpDefRegs) {
- switch(OpIdx) {
- case 1:
- return MI->isCopy() && (!MI->getOperand(0).readsReg() ||
- ImpDefRegs.count(MI->getOperand(0).getReg()));
- case 2:
- return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() ||
- ImpDefRegs.count(MI->getOperand(0).getReg()));
- default: return false;
- }
-}
-
-static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
- SmallSet<unsigned, 8> &ImpDefRegs) {
- if (MI->isCopy()) {
- MachineOperand &MO0 = MI->getOperand(0);
- MachineOperand &MO1 = MI->getOperand(1);
- if (MO1.getReg() != Reg)
- return false;
- if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg()))
- return true;
+bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
+ if (!MI->isCopyLike() &&
+ !MI->isInsertSubreg() &&
+ !MI->isRegSequence() &&
+ !MI->isPHI())
return false;
- }
- return false;
+ for (MIOperands MO(MI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse() && MO->readsReg())
+ return false;
+ return true;
}
-/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
-/// there is one implicit_def for each use. Add isUndef marker to
-/// implicit_def defs and their uses.
-bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
-
- DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
- << "********** Function: "
- << ((Value*)fn.getFunction())->getName() << '\n');
-
- bool Changed = false;
-
- TII = fn.getTarget().getInstrInfo();
- TRI = fn.getTarget().getRegisterInfo();
- MRI = &fn.getRegInfo();
- LV = getAnalysisIfAvailable<LiveVariables>();
-
- SmallSet<unsigned, 8> ImpDefRegs;
- SmallVector<MachineInstr*, 8> ImpDefMIs;
- SmallVector<MachineInstr*, 4> RUses;
- SmallPtrSet<MachineBasicBlock*,16> Visited;
- SmallPtrSet<MachineInstr*, 8> ModInsts;
-
- MachineBasicBlock *Entry = fn.begin();
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
- DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
- DFI != E; ++DFI) {
- MachineBasicBlock *MBB = *DFI;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ) {
- MachineInstr *MI = &*I;
- ++I;
- if (MI->isImplicitDef()) {
- ImpDefMIs.push_back(MI);
- // Is this a sub-register read-modify-write?
- if (MI->getOperand(0).readsReg())
- continue;
- unsigned Reg = MI->getOperand(0).getReg();
- ImpDefRegs.insert(Reg);
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
- ImpDefRegs.insert(*SS);
- }
+void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
+ DEBUG(dbgs() << "Processing " << *MI);
+ unsigned Reg = MI->getOperand(0).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // For virtual regiusters, mark all uses as <undef>, and convert users to
+ // implicit-def when possible.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ MRI->use_nodbg_begin(Reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &MO = UI.getOperand();
+ MO.setIsUndef();
+ MachineInstr *UserMI = MO.getParent();
+ if (!canTurnIntoImplicitDef(UserMI))
continue;
- }
-
- // Eliminate %reg1032:sub<def> = COPY undef.
- if (MI->isCopy() && MI->getOperand(0).readsReg()) {
- MachineOperand &MO = MI->getOperand(1);
- if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
- if (LV && MO.isKill()) {
- LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
- vi.removeKill(MI);
- }
- unsigned Reg = MI->getOperand(0).getReg();
- MI->eraseFromParent();
- Changed = true;
-
- // A REG_SEQUENCE may have been expanded into partial definitions.
- // If this was the last one, mark Reg as implicitly defined.
- if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg))
- ImpDefRegs.insert(Reg);
- continue;
- }
- }
-
- bool ChangedToImpDef = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.readsReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
- continue;
- if (!ImpDefRegs.count(Reg))
- continue;
- // Use is a copy, just turn it into an implicit_def.
- if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
- bool isKill = MO.isKill();
- MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
- for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
- MI->RemoveOperand(j);
- if (isKill) {
- ImpDefRegs.erase(Reg);
- if (LV) {
- LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
- vi.removeKill(MI);
- }
- }
- ChangedToImpDef = true;
- Changed = true;
- break;
- }
-
- Changed = true;
- MO.setIsUndef();
- // This is a partial register redef of an implicit def.
- // Make sure the whole register is defined by the instruction.
- if (MO.isDef()) {
- MI->addRegisterDefined(Reg);
- continue;
- }
- if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
- // Make sure other reads of Reg are also marked <undef>.
- for (unsigned j = i+1; j != e; ++j) {
- MachineOperand &MOJ = MI->getOperand(j);
- if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
- MOJ.setIsUndef();
- }
- ImpDefRegs.erase(Reg);
- }
- }
-
- if (ChangedToImpDef) {
- // Backtrack to process this new implicit_def.
- --I;
- } else {
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- ImpDefRegs.erase(MO.getReg());
- }
- }
+ DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);
+ UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ WorkList.insert(UserMI);
}
+ MI->eraseFromParent();
+ return;
+ }
- // Any outstanding liveout implicit_def's?
- for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
- MachineInstr *MI = ImpDefMIs[i];
- unsigned Reg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !ImpDefRegs.count(Reg)) {
- // Delete all "local" implicit_def's. That include those which define
- // physical registers since they cannot be liveout.
- MI->eraseFromParent();
- Changed = true;
+ // This is a physreg implicit-def.
+ // Look for the first instruction to use or define an alias.
+ MachineBasicBlock::instr_iterator UserMI = MI;
+ MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
+ bool Found = false;
+ for (++UserMI; UserMI != UserE; ++UserMI) {
+ for (MIOperands MO(UserMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
continue;
- }
-
- // If there are multiple defs of the same register and at least one
- // is not an implicit_def, do not insert implicit_def's before the
- // uses.
- bool Skip = false;
- SmallVector<MachineInstr*, 4> DeadImpDefs;
- for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
- DE = MRI->def_end(); DI != DE; ++DI) {
- MachineInstr *DeadImpDef = &*DI;
- if (!DeadImpDef->isImplicitDef()) {
- Skip = true;
- break;
- }
- DeadImpDefs.push_back(DeadImpDef);
- }
- if (Skip)
+ unsigned UserReg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(UserReg) ||
+ !TRI->regsOverlap(Reg, UserReg))
continue;
+ // UserMI uses or redefines Reg. Set <undef> flags on all uses.
+ Found = true;
+ if (MO->isUse())
+ MO->setIsUndef();
+ }
+ if (Found)
+ break;
+ }
- // The only implicit_def which we want to keep are those that are live
- // out of its block.
- for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
- DeadImpDefs[j]->eraseFromParent();
- Changed = true;
-
- // Process each use instruction once.
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
- UE = MRI->use_end(); UI != UE; ++UI) {
- if (UI.getOperand().isUndef())
- continue;
- MachineInstr *RMI = &*UI;
- if (ModInsts.insert(RMI))
- RUses.push_back(RMI);
- }
+ // If we found the using MI, we can erase the IMPLICIT_DEF.
+ if (Found) {
+ DEBUG(dbgs() << "Physreg user: " << *UserMI);
+ MI->eraseFromParent();
+ return;
+ }
- for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
- MachineInstr *RMI = RUses[i];
+ // Using instr wasn't found, it could be in another block.
+ // Leave the physreg IMPLICIT_DEF, but trim any extra operands.
+ for (unsigned i = MI->getNumOperands() - 1; i; --i)
+ MI->RemoveOperand(i);
+ DEBUG(dbgs() << "Keeping physreg: " << *MI);
+}
- // Turn a copy use into an implicit_def.
- if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
- RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into
+/// <undef> operands.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
- bool isKill = false;
- SmallVector<unsigned, 4> Ops;
- for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
- MachineOperand &RRMO = RMI->getOperand(j);
- if (RRMO.isReg() && RRMO.getReg() == Reg) {
- Ops.push_back(j);
- if (RRMO.isKill())
- isKill = true;
- }
- }
- // Leave the other operands along.
- for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
- unsigned OpIdx = Ops[j];
- RMI->RemoveOperand(OpIdx-j);
- }
+ DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: "
+ << ((Value*)MF.getFunction())->getName() << '\n');
- // Update LiveVariables varinfo if the instruction is a kill.
- if (LV && isKill) {
- LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
- vi.removeKill(RMI);
- }
- continue;
- }
+ bool Changed = false;
- // Replace Reg with a new vreg that's marked implicit.
- const TargetRegisterClass* RC = MRI->getRegClass(Reg);
- unsigned NewVReg = MRI->createVirtualRegister(RC);
- bool isKill = true;
- for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
- MachineOperand &RRMO = RMI->getOperand(j);
- if (RRMO.isReg() && RRMO.getReg() == Reg) {
- RRMO.setReg(NewVReg);
- RRMO.setIsUndef();
- if (isKill) {
- // Only the first operand of NewVReg is marked kill.
- RRMO.setIsKill();
- isKill = false;
- }
- }
- }
- }
- RUses.clear();
- ModInsts.clear();
- }
- ImpDefRegs.clear();
- ImpDefMIs.clear();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
+ assert(WorkList.empty() && "Inconsistent worklist state");
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ // Scan the basic block for implicit defs.
+ for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
+ if (MBBI->isImplicitDef())
+ WorkList.insert(MBBI);
+
+ if (WorkList.empty())
+ continue;
+
+ DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size()
+ << " implicit defs.\n");
+ Changed = true;
+
+ // Drain the WorkList to recursively process any new implicit defs.
+ do processImplicitDef(WorkList.pop_back_val());
+ while (!WorkList.empty());
}
-
return Changed;
}
-
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 458915e..c791ffb 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -302,7 +302,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
MachineBasicBlock::iterator I;
- if (! ShrinkWrapThisFunction) {
+ if (!ShrinkWrapThisFunction) {
// Spill using target interface.
I = EntryBlock->begin();
if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index b00eceb..993dbc7 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "regalloc"
#include "RegAllocBase.h"
+#include "LiveRegMatrix.h"
#include "Spiller.h"
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
@@ -34,8 +35,6 @@
using namespace llvm;
-STATISTIC(NumAssigned , "Number of registers assigned");
-STATISTIC(NumUnassigned , "Number of registers unassigned");
STATISTIC(NumNewQueued , "Number of new live ranges queued");
// Temporary verification option until we can put verification inside
@@ -47,85 +46,20 @@ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
const char *RegAllocBase::TimerGroupName = "Register Allocation";
bool RegAllocBase::VerifyEnabled = false;
-#ifndef NDEBUG
-// Verify each LiveIntervalUnion.
-void RegAllocBase::verify() {
- LiveVirtRegBitSet VisitedVRegs;
- OwningArrayPtr<LiveVirtRegBitSet>
- unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
-
- // Verify disjoint unions.
- for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
- DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
- LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
- PhysReg2LiveUnion[PhysReg].verify(VRegs);
- // Union + intersection test could be done efficiently in one pass, but
- // don't add a method to SparseBitVector unless we really need it.
- assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
- VisitedVRegs |= VRegs;
- }
-
- // Verify vreg coverage.
- for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
- liItr != liEnd; ++liItr) {
- unsigned reg = liItr->first;
- if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
- if (!VRM->hasPhys(reg)) continue; // spilled?
- unsigned PhysReg = VRM->getPhys(reg);
- if (!unionVRegs[PhysReg].test(reg)) {
- dbgs() << "LiveVirtReg " << reg << " not in union " <<
- TRI->getName(PhysReg) << "\n";
- llvm_unreachable("unallocated live vreg");
- }
- }
- // FIXME: I'm not sure how to verify spilled intervals.
-}
-#endif //!NDEBUG
-
//===----------------------------------------------------------------------===//
// RegAllocBase Implementation
//===----------------------------------------------------------------------===//
-// Instantiate a LiveIntervalUnion for each physical register.
-void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
- unsigned NRegs) {
- NumRegs = NRegs;
- Array =
- static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
- for (unsigned r = 0; r != NRegs; ++r)
- new(Array + r) LiveIntervalUnion(r, allocator);
-}
-
-void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
- NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+void RegAllocBase::init(VirtRegMap &vrm,
+ LiveIntervals &lis,
+ LiveRegMatrix &mat) {
TRI = &vrm.getTargetRegInfo();
MRI = &vrm.getRegInfo();
VRM = &vrm;
LIS = &lis;
+ Matrix = &mat;
MRI->freezeReservedRegs(vrm.getMachineFunction());
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
-
- const unsigned NumRegs = TRI->getNumRegs();
- if (NumRegs != PhysReg2LiveUnion.numRegs()) {
- PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
- // Cache an interferece query for each physical reg
- Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
- }
-}
-
-void RegAllocBase::LiveUnionArray::clear() {
- if (!Array)
- return;
- for (unsigned r = 0; r != NumRegs; ++r)
- Array[r].~LiveIntervalUnion();
- free(Array);
- NumRegs = 0;
- Array = 0;
-}
-
-void RegAllocBase::releaseMemory() {
- for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
- PhysReg2LiveUnion[r].clear();
}
// Visit all the live registers. If they are already assigned to a physical
@@ -133,35 +67,14 @@ void RegAllocBase::releaseMemory() {
// them on the priority queue for later assignment.
void RegAllocBase::seedLiveRegs() {
NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
- for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
- unsigned RegNum = I->first;
- LiveInterval &VirtReg = *I->second;
- if (TargetRegisterInfo::isPhysicalRegister(RegNum))
- PhysReg2LiveUnion[RegNum].unify(VirtReg);
- else
- enqueue(&VirtReg);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ enqueue(&LIS->getInterval(Reg));
}
}
-void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
- << " to " << PrintReg(PhysReg, TRI) << '\n');
- assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
- VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
- MRI->setPhysRegUsed(PhysReg);
- PhysReg2LiveUnion[PhysReg].unify(VirtReg);
- ++NumAssigned;
-}
-
-void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
- << " from " << PrintReg(PhysReg, TRI) << '\n');
- assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
- PhysReg2LiveUnion[PhysReg].extract(VirtReg);
- VRM->clearVirt(VirtReg.reg);
- ++NumUnassigned;
-}
-
// Top-level driver to manage the queue of unassigned VirtRegs and call the
// selectOrSplit implementation.
void RegAllocBase::allocatePhysRegs() {
@@ -179,14 +92,14 @@ void RegAllocBase::allocatePhysRegs() {
}
// Invalidate all interference queries, live ranges could have changed.
- invalidateVirtRegs();
+ Matrix->invalidateVirtRegs();
// selectOrSplit requests the allocator to return an available physical
// register if possible and populate a list of new live intervals that
// result from splitting.
DEBUG(dbgs() << "\nselectOrSplit "
<< MRI->getRegClass(VirtReg->reg)->getName()
- << ':' << *VirtReg << '\n');
+ << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n');
typedef SmallVector<LiveInterval*, 4> VirtRegVec;
VirtRegVec SplitVRegs;
unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
@@ -211,7 +124,7 @@ void RegAllocBase::allocatePhysRegs() {
}
if (AvailablePhysReg)
- assign(*VirtReg, AvailablePhysReg);
+ Matrix->assign(*VirtReg, AvailablePhysReg);
for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
I != E; ++I) {
@@ -230,51 +143,3 @@ void RegAllocBase::allocatePhysRegs() {
}
}
}
-
-// Check if this live virtual register interferes with a physical register. If
-// not, then check for interference on each register that aliases with the
-// physical register. Return the interfering register.
-unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
- unsigned PhysReg) {
- for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
- if (query(VirtReg, *AliasI).checkInterference())
- return *AliasI;
- return 0;
-}
-
-// Add newly allocated physical registers to the MBB live in sets.
-void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
- NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
- SlotIndexes *Indexes = LIS->getSlotIndexes();
- if (MF->size() <= 1)
- return;
-
- LiveIntervalUnion::SegmentIter SI;
- for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
- LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
- if (LiveUnion.empty())
- continue;
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
- MachineFunction::iterator MBB = llvm::next(MF->begin());
- MachineFunction::iterator MFE = MF->end();
- SlotIndex Start, Stop;
- tie(Start, Stop) = Indexes->getMBBRange(MBB);
- SI.setMap(LiveUnion.getMap());
- SI.find(Start);
- while (SI.valid()) {
- if (SI.start() <= Start) {
- if (!MBB->isLiveIn(PhysReg))
- MBB->addLiveIn(PhysReg);
- DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
- << PrintReg(SI.value()->reg, TRI));
- } else if (SI.start() > Stop)
- MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
- if (++MBB == MFE)
- break;
- tie(Start, Stop) = Indexes->getMBBRange(MBB);
- SI.advanceTo(Start);
- }
- DEBUG(dbgs() << '\n');
- }
-}
-
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 072fe2b..db0c8e1 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -37,9 +37,9 @@
#ifndef LLVM_CODEGEN_REGALLOCBASE
#define LLVM_CODEGEN_REGALLOCBASE
-#include "llvm/ADT/OwningPtr.h"
#include "LiveIntervalUnion.h"
-#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/ADT/OwningPtr.h"
namespace llvm {
@@ -47,6 +47,7 @@ template<typename T> class SmallVectorImpl;
class TargetRegisterInfo;
class VirtRegMap;
class LiveIntervals;
+class LiveRegMatrix;
class Spiller;
/// RegAllocBase provides the register allocation driver and interface that can
@@ -56,69 +57,20 @@ class Spiller;
/// live range splitting. They must also override enqueue/dequeue to provide an
/// assignment order.
class RegAllocBase {
- LiveIntervalUnion::Allocator UnionAllocator;
-
- // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual
- // registers may have changed.
- unsigned UserTag;
-
- // Array of LiveIntervalUnions indexed by physical register.
- class LiveUnionArray {
- unsigned NumRegs;
- LiveIntervalUnion *Array;
- public:
- LiveUnionArray(): NumRegs(0), Array(0) {}
- ~LiveUnionArray() { clear(); }
-
- unsigned numRegs() const { return NumRegs; }
-
- void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
-
- void clear();
-
- LiveIntervalUnion& operator[](unsigned PhysReg) {
- assert(PhysReg < NumRegs && "physReg out of bounds");
- return Array[PhysReg];
- }
- };
-
- LiveUnionArray PhysReg2LiveUnion;
-
- // Current queries, one per physreg. They must be reinitialized each time we
- // query on a new live virtual register.
- OwningArrayPtr<LiveIntervalUnion::Query> Queries;
-
protected:
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
VirtRegMap *VRM;
LiveIntervals *LIS;
+ LiveRegMatrix *Matrix;
RegisterClassInfo RegClassInfo;
- RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
+ RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {}
virtual ~RegAllocBase() {}
// A RegAlloc pass should call this before allocatePhysRegs.
- void init(VirtRegMap &vrm, LiveIntervals &lis);
-
- // Get an initialized query to check interferences between lvr and preg. Note
- // that Query::init must be called at least once for each physical register
- // before querying a new live virtual register. This ties Queries and
- // PhysReg2LiveUnion together.
- LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
- Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]);
- return Queries[PhysReg];
- }
-
- // Get direct access to the underlying LiveIntervalUnion for PhysReg.
- LiveIntervalUnion &getLiveUnion(unsigned PhysReg) {
- return PhysReg2LiveUnion[PhysReg];
- }
-
- // Invalidate all cached information about virtual registers - live ranges may
- // have changed.
- void invalidateVirtRegs() { ++UserTag; }
+ void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
// The top-level driver. The output is a VirtRegMap that us updated with
// physical register assignments.
@@ -140,31 +92,6 @@ protected:
virtual unsigned selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
- // A RegAlloc pass should call this when PassManager releases its memory.
- virtual void releaseMemory();
-
- // Helper for checking interference between a live virtual register and a
- // physical register, including all its register aliases. If an interference
- // exists, return the interfering register, which may be preg or an alias.
- unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
-
- /// assign - Assign VirtReg to PhysReg.
- /// This should not be called from selectOrSplit for the current register.
- void assign(LiveInterval &VirtReg, unsigned PhysReg);
-
- /// unassign - Undo a previous assignment of VirtReg to PhysReg.
- /// This can be invoked from selectOrSplit, but be careful to guarantee that
- /// allocation is making progress.
- void unassign(LiveInterval &VirtReg, unsigned PhysReg);
-
- /// addMBBLiveIns - Add physreg liveins to basic blocks.
- void addMBBLiveIns(MachineFunction *);
-
-#ifndef NDEBUG
- // Verify each LiveIntervalUnion.
- void verify();
-#endif
-
// Use this group name for NamedRegionTimer.
static const char *TimerGroupName;
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 77ee314..3a03807 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -13,11 +13,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
#include "RegAllocBase.h"
#include "LiveDebugVariables.h"
-#include "RenderMachineFunction.h"
#include "Spiller.h"
#include "VirtRegMap.h"
+#include "LiveRegMatrix.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
#include "llvm/PassAnalysisSupport.h"
@@ -64,10 +65,6 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
// context
MachineFunction *MF;
- // analyses
- LiveStacks *LS;
- RenderMachineFunction *RMF;
-
// state
std::auto_ptr<Spiller> SpillerInstance;
std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
@@ -118,9 +115,6 @@ public:
bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs);
- void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs);
-
static char ID;
};
@@ -139,7 +133,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
}
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -147,6 +141,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
@@ -159,41 +154,15 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
- DEBUG(AU.addRequired<RenderMachineFunction>());
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
MachineFunctionPass::getAnalysisUsage(AU);
}
void RABasic::releaseMemory() {
SpillerInstance.reset(0);
- RegAllocBase::releaseMemory();
}
-// Helper for spillInterferences() that spills all interfering vregs currently
-// assigned to this physical register.
-void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs) {
- LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
- assert(Q.seenAllInterferences() && "need collectInterferences()");
- const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
-
- for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
- E = PendingSpills.end(); I != E; ++I) {
- LiveInterval &SpilledVReg = **I;
- DEBUG(dbgs() << "extracting from " <<
- TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
-
- // Deallocate the interfering vreg by removing it from the union.
- // A LiveInterval instance may not be in a union during modification!
- unassign(SpilledVReg, PhysReg);
-
- // Spill the extracted interval.
- LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM);
- spiller().spill(LRE);
- }
- // After extracting segments, the query's results are invalid. But keep the
- // contents valid until we're done accessing pendingSpills.
- Q.clear();
-}
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
@@ -202,22 +171,41 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
- unsigned NumInterferences = 0;
+ SmallVector<LiveInterval*, 8> Intfs;
+
// Collect interferences assigned to any alias of the physical register.
- for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
- LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
- NumInterferences += QAlias.collectInterferingVRegs();
- if (QAlias.seenUnspillableVReg()) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ Q.collectInterferingVRegs();
+ if (Q.seenUnspillableVReg())
return false;
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
+ return false;
+ Intfs.push_back(Intf);
}
}
DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
" interferences with " << VirtReg << "\n");
- assert(NumInterferences > 0 && "expect interference");
+ assert(!Intfs.empty() && "expected interference");
// Spill each interfering vreg allocated to PhysReg or an alias.
- for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
- spillReg(VirtReg, *AliasI, SplitVRegs);
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval &Spill = *Intfs[i];
+
+ // Skip duplicates.
+ if (!VRM->hasPhys(Spill.reg))
+ continue;
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ Matrix->unassign(Spill);
+
+ // Spill the extracted interval.
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
+ spiller().spill(LRE);
+ }
return true;
}
@@ -235,49 +223,36 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
// selectOrSplit().
unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs) {
- // Check for register mask interference. When live ranges cross calls, the
- // set of usable registers is reduced to the callee-saved ones.
- bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs);
-
// Populate a list of physical register spill candidates.
SmallVector<unsigned, 8> PhysRegSpillCands;
// Check for an available register in this class.
- ArrayRef<unsigned> Order =
- RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg));
- for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E;
- ++I) {
- unsigned PhysReg = *I;
-
- // If PhysReg is clobbered by a register mask, it isn't useful for
- // allocation or spilling.
- if (CrossRegMasks && !UsableRegs.test(PhysReg))
- continue;
-
- // Check interference and as a side effect, intialize queries for this
- // VirtReg and its aliases.
- unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
- if (interfReg == 0) {
- // Found an available register.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ while (unsigned PhysReg = Order.next()) {
+ // Check for interference in PhysReg
+ switch (Matrix->checkInterference(VirtReg, PhysReg)) {
+ case LiveRegMatrix::IK_Free:
+ // PhysReg is available, allocate it.
return PhysReg;
- }
- LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg);
- IntfQ.collectInterferingVRegs(1);
- LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front();
- // The current VirtReg must either be spillable, or one of its interferences
- // must have less spill weight.
- if (interferingVirtReg->weight < VirtReg.weight ) {
+ case LiveRegMatrix::IK_VirtReg:
+ // Only virtual registers in the way, we may be able to spill them.
PhysRegSpillCands.push_back(PhysReg);
+ continue;
+
+ default:
+ // RegMask or RegUnit interference.
+ continue;
}
}
+
// Try to spill another interfering reg with less spill weight.
for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
- PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
-
- if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
+ continue;
- assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+ assert(!Matrix->checkInterference(VirtReg, *PhysRegI) &&
"Interference after spill.");
// Tell the caller to allocate to this newly freed physical register.
return *PhysRegI;
@@ -287,7 +262,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM);
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
@@ -301,53 +276,17 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
<< ((Value*)mf.getFunction())->getName() << '\n');
MF = &mf;
- DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
-
- RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
allocatePhysRegs();
- addMBBLiveIns(MF);
-
// Diagnostic output before rewriting
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
- // optional HTML output
- DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
-
- // FIXME: Verification currently must run before VirtRegRewriter. We should
- // make the rewriter a separate pass and override verifyAnalysis instead. When
- // that happens, verification naturally falls under VerifyMachineCode.
-#ifndef NDEBUG
- if (VerifyEnabled) {
- // Verify accuracy of LiveIntervals. The standard machine code verifier
- // ensures that each LiveIntervals covers all uses of the virtual reg.
-
- // FIXME: MachineVerifier is badly broken when using the standard
- // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
- // inline spiller, some tests fail to verify because the coalescer does not
- // always generate verifiable code.
- MF->verify(this, "In RABasic::verify");
-
- // Verify that LiveIntervals are partitioned into unions and disjoint within
- // the unions.
- verify();
- }
-#endif // !NDEBUG
-
- // Run rewriter
- VRM->rewrite(LIS->getSlotIndexes());
-
- // Write out new DBG_VALUE instructions.
- getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
-
- // All machine operands and other references to virtual registers have been
- // replaced. Remove the virtual registers and release all the transient data.
- VRM->clearAllVirt();
- MRI->clearVirtRegs();
releaseMemory();
-
return true;
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index e09b7f8..8325f20 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
-#include "RegisterClassInfo.h"
#include "llvm/BasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -22,6 +21,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
@@ -77,7 +77,7 @@ namespace {
explicit LiveReg(unsigned v)
: LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
- unsigned getSparseSetKey() const {
+ unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
}
};
@@ -354,8 +354,8 @@ void RAFast::usePhysReg(MachineOperand &MO) {
}
// Maybe a superregister is reserved?
- for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
- unsigned Alias = *AS; ++AS) {
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
switch (PhysRegState[Alias]) {
case regDisabled:
break;
@@ -408,8 +408,8 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
// This is a disabled register, disable all aliases.
PhysRegState[PhysReg] = NewState;
- for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
- unsigned Alias = *AS; ++AS) {
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
@@ -456,8 +456,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
// This is a disabled register, add up cost of aliases.
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
unsigned Cost = 0;
- for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
- unsigned Alias = *AS; ++AS) {
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
if (UsedInInstr.test(Alias))
return spillImpossible;
switch (unsigned VirtReg = PhysRegState[Alias]) {
@@ -659,9 +659,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
// Return true if the operand kills its register.
bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
MachineOperand &MO = MI->getOperand(OpNum);
+ bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
- return MO.isKill() || MO.isDead();
+ return MO.isKill() || Dead;
}
// Handle subregister index.
@@ -674,7 +675,13 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
MI->addRegisterKilled(PhysReg, TRI, true);
return true;
}
- return MO.isDead();
+
+ // A <def,read-undef> of a sub-register requires an implicit def of the full
+ // register.
+ if (MO.isDef() && MO.isUndef())
+ MI->addRegisterDefined(PhysReg, TRI);
+
+ return Dead;
}
// Handle special instruction operand like early clobbers and tied ops when
@@ -704,13 +711,10 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- UsedInInstr.set(Reg);
- if (ThroughRegs.count(PhysRegState[Reg]))
- definePhysReg(MI, Reg, regFree);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- UsedInInstr.set(*AS);
- if (ThroughRegs.count(PhysRegState[*AS]))
- definePhysReg(MI, *AS, regFree);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ UsedInInstr.set(*AI);
+ if (ThroughRegs.count(PhysRegState[*AI]))
+ definePhysReg(MI, *AI, regFree);
}
}
@@ -1029,9 +1033,8 @@ void RAFast::AllocateBasicBlock() {
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
- UsedInInstr.set(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- UsedInInstr.set(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ UsedInInstr.set(*AI);
}
}
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 3f2a617..6ac5428 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -16,6 +16,7 @@
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
+#include "LiveRegMatrix.h"
#include "RegAllocBase.h"
#include "Spiller.h"
#include "SpillPlacement.h"
@@ -73,7 +74,6 @@ class RAGreedy : public MachineFunctionPass,
// analyses
SlotIndexes *Indexes;
- LiveStacks *LS;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
EdgeBundles *Bundles;
@@ -168,19 +168,6 @@ class RAGreedy : public MachineFunctionPass,
}
};
- // Register mask interference. The current VirtReg is checked for register
- // mask interference on entry to selectOrSplit(). If there is no
- // interference, UsableRegs is left empty. If there is interference,
- // UsableRegs has a bit mask of registers that can be used without register
- // mask interference.
- BitVector UsableRegs;
-
- /// clobberedByRegMask - Returns true if PhysReg is not directly usable
- /// because of register mask clobbers.
- bool clobberedByRegMask(unsigned PhysReg) const {
- return !UsableRegs.empty() && !UsableRegs.test(PhysReg);
- }
-
// splitting state.
std::auto_ptr<SplitAnalysis> SA;
std::auto_ptr<SplitEditor> SE;
@@ -286,6 +273,8 @@ private:
SmallVectorImpl<LiveInterval*>&);
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<LiveInterval*>&);
+ unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<LiveInterval*>&);
unsigned trySplit(LiveInterval&, AllocationOrder&,
@@ -327,6 +316,7 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
}
@@ -336,6 +326,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
@@ -349,6 +340,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
AU.addRequired<EdgeBundles>();
AU.addRequired<SpillPlacement>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -360,8 +353,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
//===----------------------------------------------------------------------===//
bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
- if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
- unassign(LIS->getInterval(VirtReg), PhysReg);
+ if (VRM->hasPhys(VirtReg)) {
+ Matrix->unassign(LIS->getInterval(VirtReg));
return true;
}
// Unassigned virtreg is probably in the priority queue.
@@ -370,13 +363,12 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
}
void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
- unsigned PhysReg = VRM->getPhys(VirtReg);
- if (!PhysReg)
+ if (!VRM->hasPhys(VirtReg))
return;
// Register is assigned, put it back on the queue for reassignment.
LiveInterval &LI = LIS->getInterval(VirtReg);
- unassign(LI, PhysReg);
+ Matrix->unassign(LI);
enqueue(&LI);
}
@@ -398,7 +390,6 @@ void RAGreedy::releaseMemory() {
SpillerInstance.reset(0);
ExtraRegInfo.clear();
GlobalCand.clear();
- RegAllocBase::releaseMemory();
}
void RAGreedy::enqueue(LiveInterval *LI) {
@@ -450,12 +441,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
Order.rewind();
unsigned PhysReg;
- while ((PhysReg = Order.next())) {
- if (clobberedByRegMask(PhysReg))
- continue;
- if (!checkPhysRegInterference(VirtReg, PhysReg))
+ while ((PhysReg = Order.next()))
+ if (!Matrix->checkInterference(VirtReg, PhysReg))
break;
- }
if (!PhysReg || Order.isHint(PhysReg))
return PhysReg;
@@ -464,7 +452,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
// If we missed a simple hint, try to cheaply evict interference from the
// preferred register.
if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
- if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) {
+ if (Order.isHint(Hint)) {
DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
EvictionCost MaxCost(1);
if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -527,6 +515,10 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @returns True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
bool IsHint, EvictionCost &MaxCost) {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
+ return false;
+
// Find VirtReg's cascade number. This will be unassigned if VirtReg was never
// involved in an eviction before. If a cascade number was assigned, deny
// evicting anything with the same or a newer cascade number. This prevents
@@ -539,8 +531,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
Cascade = NextCascade;
EvictionCost Cost;
- for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
- LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is 10 or more interferences, chances are one is heavier.
if (Q.collectInterferingVRegs(10) >= 10)
return false;
@@ -548,15 +540,21 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
// Check if any interfering live range is heavier than MaxWeight.
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
- if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
- return false;
+ assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
+ "Only expecting virtual register interference from query");
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
return false;
// Once a live range becomes small enough, it is urgent that we find a
// register for it. This is indicated by an infinite spill weight. These
// urgent live ranges get to evict almost anything.
- bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable();
+ //
+ // Also allow urgent evictions of unspillable ranges from a strictly
+ // larger allocation order.
+ bool Urgent = !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
// Only evict older cascades or live ranges without a cascade.
unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
if (Cascade <= IntfCascade) {
@@ -597,19 +595,29 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
- for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
- LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+
+ // Collect all interfering virtregs first.
+ SmallVector<LiveInterval*, 8> Intfs;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
- for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
- LiveInterval *Intf = Q.interferingVRegs()[i];
- unassign(*Intf, VRM->getPhys(Intf->reg));
- assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
- VirtReg.isSpillable() < Intf->isSpillable()) &&
- "Cannot decrease cascade number, illegal eviction");
- ExtraRegInfo[Intf->reg].Cascade = Cascade;
- ++NumEvicted;
- NewVRegs.push_back(Intf);
- }
+ ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
+ Intfs.append(IVR.begin(), IVR.end());
+ }
+
+ // Evict them second. This will invalidate the queries.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval *Intf = Intfs[i];
+ // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
+ if (!VRM->hasPhys(Intf->reg))
+ continue;
+ Matrix->unassign(*Intf);
+ assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ VirtReg.isSpillable() < Intf->isSpillable()) &&
+ "Cannot decrease cascade number, illegal eviction");
+ ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ++NumEvicted;
+ NewVRegs.push_back(Intf);
}
}
@@ -636,8 +644,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
Order.rewind();
while (unsigned PhysReg = Order.next()) {
- if (clobberedByRegMask(PhysReg))
- continue;
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
@@ -1183,7 +1189,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
// Prepare split editor.
- LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
// Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1231,7 +1237,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
- LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1265,6 +1271,65 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
}
+
+//===----------------------------------------------------------------------===//
+// Per-Instruction Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryInstructionSplit - Split a live range around individual instructions.
+/// This is normally not worthwhile since the spiller is doing essentially the
+/// same thing. However, when the live range is in a constrained register
+/// class, it may help to insert copies such that parts of the live range can
+/// be moved to a larger register class.
+///
+/// This is similar to spilling to a larger register class.
+unsigned
+RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // There is no point to this if there are no larger sub-classes.
+ if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg)))
+ return 0;
+
+ // Always enable split spill mode, since we're effectively spilling to a
+ // register.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit, SplitEditor::SM_Size);
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 1)
+ return 0;
+
+ DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
+
+ // Split around every non-copy instruction.
+ for (unsigned i = 0; i != Uses.size(); ++i) {
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
+ if (MI->isFullCopy()) {
+ DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
+ continue;
+ }
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]);
+ SE->useIntv(SegStart, SegStop);
+ }
+
+ if (LREdit.empty()) {
+ DEBUG(dbgs() << "All uses were copies.\n");
+ return 0;
+ }
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Assign all new registers to RS_Spill. This was the last chance.
+ setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+ return 0;
+}
+
+
//===----------------------------------------------------------------------===//
// Local Splitting
//===----------------------------------------------------------------------===//
@@ -1291,9 +1356,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
GapWeight.assign(NumGaps, 0.0f);
// Add interference from each overlapping register.
- for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
- if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
- .checkInterference())
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
+ .checkInterference())
continue;
// We know that VirtReg is a continuous interval from FirstInstr to
@@ -1303,7 +1368,8 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
// surrounding the instruction. The exception is interference before
// StartIdx and after StopIdx.
//
- LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx);
+ LiveIntervalUnion::SegmentIter IntI =
+ Matrix->getLiveUnions()[*Units] .find(StartIdx);
for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
// Skip the gaps before IntI.
while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
@@ -1323,6 +1389,30 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
break;
}
}
+
+ // Add fixed interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ const LiveInterval &LI = LIS->getRegUnit(*Units);
+ LiveInterval::const_iterator I = LI.find(StartIdx);
+ LiveInterval::const_iterator E = LI.end();
+
+ // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
+ for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
+ while (Uses[Gap+1].getBoundaryIndex() < I->start)
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = HUGE_VALF;
+ if (Uses[Gap+1].getBaseIndex() >= I->end)
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
}
/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
@@ -1355,7 +1445,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// If VirtReg is live across any register mask operands, compute a list of
// gaps with register masks.
SmallVector<unsigned, 8> RegMaskGaps;
- if (!UsableRegs.empty()) {
+ if (Matrix->checkRegMaskInterference(VirtReg)) {
// Get regmask slots for the whole block.
ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
DEBUG(dbgs() << RMS.size() << " regmasks in block:");
@@ -1417,7 +1507,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
calcGapWeights(PhysReg, GapWeight);
// Remove any gaps with regmask clobbers.
- if (clobberedByRegMask(PhysReg))
+ if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
GapWeight[RegMaskGaps[i]] = HUGE_VALF;
@@ -1512,7 +1602,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");
- LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit);
SE->openIntv();
@@ -1561,7 +1651,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (LIS->intervalIsInOneMBB(VirtReg)) {
NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
SA->analyze(&VirtReg);
- return tryLocalSplit(VirtReg, Order, NewVRegs);
+ unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ return tryInstructionSplit(VirtReg, Order, NewVRegs);
}
NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
@@ -1574,7 +1667,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// an assertion when the coalescer is fixed.
if (SA->didRepairRange()) {
// VirtReg has changed, so all cached queries are invalid.
- invalidateVirtRegs();
+ Matrix->invalidateVirtRegs();
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
return PhysReg;
}
@@ -1599,11 +1692,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
- // Check if VirtReg is live across any calls.
- UsableRegs.clear();
- if (LIS->checkRegMaskInterference(VirtReg, UsableRegs))
- DEBUG(dbgs() << "Live across regmasks.\n");
-
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
@@ -1644,7 +1732,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
// Finally spill VirtReg itself.
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
@@ -1665,7 +1753,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
- RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
@@ -1679,30 +1769,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
ExtraRegInfo.clear();
ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
- IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI);
+ IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
allocatePhysRegs();
- addMBBLiveIns(MF);
- LIS->addKillFlags();
-
- // Run rewriter
- {
- NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
- VRM->rewrite(Indexes);
- }
-
- // Write out new DBG_VALUE instructions.
- {
- NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled);
- DebugVars->emitDebugValues(VRM);
- }
-
- // All machine operands and other references to virtual registers have been
- // replaced. Remove the virtual registers and release all the transient data.
- VRM->clearAllVirt();
- MRI->clearVirtRegs();
releaseMemory();
-
return true;
}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index a284614..d0db26b 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,7 +31,6 @@
#define DEBUG_TYPE "regalloc"
-#include "RenderMachineFunction.h"
#include "Spiller.h"
#include "VirtRegMap.h"
#include "RegisterCoalescer.h"
@@ -98,7 +97,6 @@ public:
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
}
/// Return the pass name.
@@ -134,7 +132,6 @@ private:
const TargetInstrInfo *tii;
const MachineLoopInfo *loopInfo;
MachineRegisterInfo *mri;
- RenderMachineFunction *rmf;
std::auto_ptr<Spiller> spiller;
LiveIntervals *lis;
@@ -196,7 +193,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
const RegSet &vregs) {
typedef std::vector<const LiveInterval*> LIVector;
- ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots();
+ LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
MachineRegisterInfo *mri = &mf->getRegInfo();
const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
@@ -205,12 +202,11 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
RegSet pregs;
// Collect the set of preg intervals, record that they're used in the MF.
- for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
- itr != end; ++itr) {
- if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
- pregs.insert(itr->first);
- mri->setPhysRegUsed(itr->first);
- }
+ for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) {
+ if (mri->def_empty(Reg))
+ continue;
+ pregs.insert(Reg);
+ mri->setPhysRegUsed(Reg);
}
BitVector reservedRegs = tri->getReservedRegs(*mf);
@@ -220,7 +216,11 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
vregItr != vregEnd; ++vregItr) {
unsigned vreg = *vregItr;
const TargetRegisterClass *trc = mri->getRegClass(vreg);
- const LiveInterval *vregLI = &lis->getInterval(vreg);
+ LiveInterval *vregLI = &LIS->getInterval(vreg);
+
+ // Record any overlaps with regmask operands.
+ BitVector regMaskOverlaps(tri->getNumRegs());
+ LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
// Compute an initial allowed set for the current vreg.
typedef std::vector<unsigned> VRAllowed;
@@ -228,80 +228,26 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
for (unsigned i = 0; i != rawOrder.size(); ++i) {
unsigned preg = rawOrder[i];
- if (!reservedRegs.test(preg)) {
- vrAllowed.push_back(preg);
- }
- }
-
- RegSet overlappingPRegs;
-
- // Record physical registers whose ranges overlap.
- for (RegSet::const_iterator pregItr = pregs.begin(),
- pregEnd = pregs.end();
- pregItr != pregEnd; ++pregItr) {
- unsigned preg = *pregItr;
- const LiveInterval *pregLI = &lis->getInterval(preg);
-
- if (pregLI->empty()) {
+ if (reservedRegs.test(preg))
continue;
- }
- if (vregLI->overlaps(*pregLI))
- overlappingPRegs.insert(preg);
- }
+ // vregLI crosses a regmask operand that clobbers preg.
+ if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg))
+ continue;
- // Record any overlaps with regmask operands.
- BitVector regMaskOverlaps(tri->getNumRegs());
- for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(),
- rmEnd = regMaskSlots.end();
- rmItr != rmEnd; ++rmItr) {
- SlotIndex rmIdx = *rmItr;
- if (vregLI->liveAt(rmIdx)) {
- MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx);
- const uint32_t* regMask = 0;
- for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(),
- mopEnd = rmMI->operands_end();
- mopItr != mopEnd; ++mopItr) {
- if (mopItr->isRegMask()) {
- regMask = mopItr->getRegMask();
- break;
- }
+ // vregLI overlaps fixed regunit interference.
+ bool Interference = false;
+ for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) {
+ if (vregLI->overlaps(LIS->getRegUnit(*Units))) {
+ Interference = true;
+ break;
}
- assert(regMask != 0 && "Couldn't find register mask.");
- regMaskOverlaps.setBitsNotInMask(regMask);
}
- }
+ if (Interference)
+ continue;
- for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) {
- if (regMaskOverlaps.test(preg))
- overlappingPRegs.insert(preg);
- }
-
- for (RegSet::const_iterator pregItr = overlappingPRegs.begin(),
- pregEnd = overlappingPRegs.end();
- pregItr != pregEnd; ++pregItr) {
- unsigned preg = *pregItr;
-
- // Remove the register from the allowed set.
- VRAllowed::iterator eraseItr =
- std::find(vrAllowed.begin(), vrAllowed.end(), preg);
-
- if (eraseItr != vrAllowed.end()) {
- vrAllowed.erase(eraseItr);
- }
-
- // Also remove any aliases.
- const uint16_t *aliasItr = tri->getAliasSet(preg);
- if (aliasItr != 0) {
- for (; *aliasItr != 0; ++aliasItr) {
- VRAllowed::iterator eraseItr =
- std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
-
- if (eraseItr != vrAllowed.end()) {
- vrAllowed.erase(eraseItr);
- }
- }
- }
+ // preg is usable for this virtual register.
+ vrAllowed.push_back(preg);
}
// Construct the node.
@@ -379,7 +325,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
PBQP::Graph &g = p->getGraph();
const TargetMachine &tm = mf->getTarget();
- CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
+ CoalescerPair cp(*tm.getRegisterInfo());
// Scan the machine function and add a coalescing cost whenever CoalescerPair
// gives the Ok.
@@ -498,21 +444,17 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
au.addRequired<VirtRegMap>();
- au.addRequired<RenderMachineFunction>();
MachineFunctionPass::getAnalysisUsage(au);
}
void RegAllocPBQP::findVRegIntervalsToAlloc() {
// Iterate over all live ranges.
- for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
- itr != end; ++itr) {
-
- // Ignore physical ones.
- if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (mri->reg_nodbg_empty(Reg))
continue;
-
- LiveInterval *li = itr->second;
+ LiveInterval *li = &lis->getInterval(Reg);
// If this live interval is non-empty we will use pbqp to allocate it.
// Empty intervals we allocate in a simple post-processing stage in
@@ -544,16 +486,17 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
if (problem.isPRegOption(vreg, alloc)) {
unsigned preg = problem.getPRegForOption(vreg, alloc);
- DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+ DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> "
+ << tri->getName(preg) << "\n");
assert(preg != 0 && "Invalid preg selected.");
vrm->assignVirt2Phys(vreg, preg);
} else if (problem.isSpillOption(vreg, alloc)) {
vregsToAlloc.erase(vreg);
SmallVector<LiveInterval*, 8> newSpills;
- LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
+ LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
spiller->spill(LRE);
- DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+ DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: "
<< LRE.getParent().weight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
@@ -561,7 +504,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
itr != end; ++itr) {
assert(!(*itr)->empty() && "Empty spill range.");
- DEBUG(dbgs() << (*itr)->reg << " ");
+ DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " ");
vregsToAlloc.insert((*itr)->reg);
}
@@ -579,9 +522,6 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
void RegAllocPBQP::finalizeAlloc() const {
- typedef LiveIntervals::iterator LIIterator;
- typedef LiveInterval::Ranges::const_iterator LRIterator;
-
// First allocate registers for the empty intervals.
for (RegSet::const_iterator
itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
@@ -597,51 +537,6 @@ void RegAllocPBQP::finalizeAlloc() const {
vrm->assignVirt2Phys(li->reg, physReg);
}
-
- // Finally iterate over the basic blocks to compute and set the live-in sets.
- SmallVector<MachineBasicBlock*, 8> liveInMBBs;
- MachineBasicBlock *entryMBB = &*mf->begin();
-
- for (LIIterator liItr = lis->begin(), liEnd = lis->end();
- liItr != liEnd; ++liItr) {
-
- const LiveInterval *li = liItr->second;
- unsigned reg = 0;
-
- // Get the physical register for this interval
- if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
- reg = li->reg;
- } else if (vrm->isAssignedReg(li->reg)) {
- reg = vrm->getPhys(li->reg);
- } else {
- // Ranges which are assigned a stack slot only are ignored.
- continue;
- }
-
- if (reg == 0) {
- // Filter out zero regs - they're for intervals that were spilled.
- continue;
- }
-
- // Iterate over the ranges of the current interval...
- for (LRIterator lrItr = li->begin(), lrEnd = li->end();
- lrItr != lrEnd; ++lrItr) {
-
- // Find the set of basic blocks which this range is live into...
- if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
- // And add the physreg for this interval to their live-in sets.
- for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
- if (liveInMBBs[i] != entryMBB) {
- if (!liveInMBBs[i]->isLiveIn(reg)) {
- liveInMBBs[i]->addLiveIn(reg);
- }
- }
- }
- liveInMBBs.clear();
- }
- }
- }
-
}
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
@@ -655,7 +550,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
lis = &getAnalysis<LiveIntervals>();
lss = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
- rmf = &getAnalysis<RenderMachineFunction>();
vrm = &getAnalysis<VirtRegMap>();
spiller.reset(createInlineSpiller(*this, MF, *vrm));
@@ -719,22 +613,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// Finalise allocation, allocate empty ranges.
finalizeAlloc();
-
- rmf->renderMachineFunction("After PBQP register allocation.", vrm);
-
vregsToAlloc.clear();
emptyIntervalVRegs.clear();
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
- // Run rewriter
- vrm->rewrite(lis->getSlotIndexes());
-
- // All machine operands and other references to virtual registers have been
- // replaced. Remove the virtual registers.
- vrm->clearAllVirt();
- mri->clearVirtRegs();
-
return true;
}
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 17165fa..652bc30 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -15,8 +15,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
-#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -50,9 +50,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
CSRNum.clear();
CSRNum.resize(TRI->getNumRegs(), 0);
for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
- for (const uint16_t *AS = TRI->getOverlaps(Reg);
- unsigned Alias = *AS; ++AS)
- CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
Update = true;
}
CalleeSaved = CSR;
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
deleted file mode 100644
index 400e1f4..0000000
--- a/lib/CodeGen/RegisterClassInfo.h
+++ /dev/null
@@ -1,132 +0,0 @@
-//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the RegisterClassInfo class which provides dynamic
-// information about target register classes. Callee saved and reserved
-// registers depends on calling conventions and other dynamic information, so
-// some things cannot be determined statically.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H
-#define LLVM_CODEGEN_REGISTERCLASSINFO_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-namespace llvm {
-
-class RegisterClassInfo {
- struct RCInfo {
- unsigned Tag;
- unsigned NumRegs;
- bool ProperSubClass;
- OwningArrayPtr<unsigned> Order;
-
- RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {}
- operator ArrayRef<unsigned>() const {
- return makeArrayRef(Order.get(), NumRegs);
- }
- };
-
- // Brief cached information for each register class.
- OwningArrayPtr<RCInfo> RegClass;
-
- // Tag changes whenever cached information needs to be recomputed. An RCInfo
- // entry is valid when its tag matches.
- unsigned Tag;
-
- const MachineFunction *MF;
- const TargetRegisterInfo *TRI;
-
- // Callee saved registers of last MF. Assumed to be valid until the next
- // runOnFunction() call.
- const uint16_t *CalleeSaved;
-
- // Map register number to CalleeSaved index + 1;
- SmallVector<uint8_t, 4> CSRNum;
-
- // Reserved registers in the current MF.
- BitVector Reserved;
-
- // Compute all information about RC.
- void compute(const TargetRegisterClass *RC) const;
-
- // Return an up-to-date RCInfo for RC.
- const RCInfo &get(const TargetRegisterClass *RC) const {
- const RCInfo &RCI = RegClass[RC->getID()];
- if (Tag != RCI.Tag)
- compute(RC);
- return RCI;
- }
-
-public:
- RegisterClassInfo();
-
- /// runOnFunction - Prepare to answer questions about MF. This must be called
- /// before any other methods are used.
- void runOnMachineFunction(const MachineFunction &MF);
-
- /// getNumAllocatableRegs - Returns the number of actually allocatable
- /// registers in RC in the current function.
- unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const {
- return get(RC).NumRegs;
- }
-
- /// getOrder - Returns the preferred allocation order for RC. The order
- /// contains no reserved registers, and registers that alias callee saved
- /// registers come last.
- ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const {
- return get(RC);
- }
-
- /// isProperSubClass - Returns true if RC has a legal super-class with more
- /// allocatable registers.
- ///
- /// Register classes like GR32_NOSP are not proper sub-classes because %esp
- /// is not allocatable. Similarly, tGPR is not a proper sub-class in Thumb
- /// mode because the GPR super-class is not legal.
- bool isProperSubClass(const TargetRegisterClass *RC) const {
- return get(RC).ProperSubClass;
- }
-
- /// getLastCalleeSavedAlias - Returns the last callee saved register that
- /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
- unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
- if (unsigned N = CSRNum[PhysReg])
- return CalleeSaved[N-1];
- return 0;
- }
-
- /// isReserved - Returns true when PhysReg is a reserved register.
- ///
- /// Reserved registers may belong to an allocatable register class, but the
- /// target has explicitly requested that they are not used.
- ///
- bool isReserved(unsigned PhysReg) const {
- return Reserved.test(PhysReg);
- }
-
- /// isAllocatable - Returns true when PhysReg belongs to an allocatable
- /// register class and it hasn't been reserved.
- ///
- /// Allocatable registers may show up in the allocation order of some virtual
- /// register, so a register allocator needs to track its liveness and
- /// availability.
- bool isAllocatable(unsigned PhysReg) const {
- return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
- }
-};
-} // end namespace llvm
-
-#endif
-
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 75f88ca..733312f 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,34 +16,35 @@
#define DEBUG_TYPE "regalloc"
#include "RegisterCoalescer.h"
#include "LiveDebugVariables.h"
-#include "RegisterClassInfo.h"
#include "VirtRegMap.h"
#include "llvm/Pass.h"
#include "llvm/Value.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <cmath>
using namespace llvm;
@@ -53,8 +54,6 @@ STATISTIC(numCrossRCs , "Number of cross class joins performed");
STATISTIC(numCommutes , "Number of instruction commuting performed");
STATISTIC(numExtends , "Number of copies extended");
STATISTIC(NumReMats , "Number of instructions re-materialized");
-STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
-STATISTIC(numAborts , "Number of times interval joining aborted");
STATISTIC(NumInflated , "Number of register classes inflated");
static cl::opt<bool>
@@ -63,22 +62,13 @@ EnableJoining("join-liveintervals",
cl::init(true));
static cl::opt<bool>
-DisableCrossClassJoin("disable-cross-class-join",
- cl::desc("Avoid coalescing cross register class copies"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-EnablePhysicalJoin("join-physregs",
- cl::desc("Join physical register copies"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
VerifyCoalescing("verify-coalescing",
cl::desc("Verify machine instrs before and after register coalescing"),
cl::Hidden);
namespace {
- class RegisterCoalescer : public MachineFunctionPass {
+ class RegisterCoalescer : public MachineFunctionPass,
+ private LiveRangeEdit::Delegate {
MachineFunction* MF;
MachineRegisterInfo* MRI;
const TargetMachine* TM;
@@ -90,87 +80,83 @@ namespace {
AliasAnalysis *AA;
RegisterClassInfo RegClassInfo;
- /// JoinedCopies - Keep track of copies eliminated due to coalescing.
- ///
- SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+ /// WorkList - Copy instructions yet to be coalesced.
+ SmallVector<MachineInstr*, 8> WorkList;
+
+ /// ErasedInstrs - Set of instruction pointers that have been erased, and
+ /// that may be present in WorkList.
+ SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
+
+ /// Dead instructions that are about to be deleted.
+ SmallVector<MachineInstr*, 8> DeadDefs;
- /// ReMatCopies - Keep track of copies eliminated due to remat.
- ///
- SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+ /// Virtual registers to be considered for register class inflation.
+ SmallVector<unsigned, 8> InflateRegs;
- /// ReMatDefs - Keep track of definition instructions which have
- /// been remat'ed.
- SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+ /// Recursively eliminate dead defs in DeadDefs.
+ void eliminateDeadDefs();
- /// joinIntervals - join compatible live intervals
- void joinIntervals();
+ /// LiveRangeEdit callback.
+ void LRE_WillEraseInstruction(MachineInstr *MI);
- /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
- /// copies that cannot yet be coalesced into the "TryAgain" list.
- void CopyCoalesceInMBB(MachineBasicBlock *MBB,
- std::vector<MachineInstr*> &TryAgain);
+ /// joinAllIntervals - join compatible live intervals
+ void joinAllIntervals();
- /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into WorkList.
+ void copyCoalesceInMBB(MachineBasicBlock *MBB);
+
+ /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after
+ /// position From. Return true if any progress was made.
+ bool copyCoalesceWorkList(unsigned From = 0);
+
+ /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns
/// true if the copy was successfully coalesced away. If it is not
/// currently possible to coalesce this interval, but it may be possible if
/// other things get coalesced, then it returns true by reference in
/// 'Again'.
- bool JoinCopy(MachineInstr *TheCopy, bool &Again);
+ bool joinCopy(MachineInstr *TheCopy, bool &Again);
- /// JoinIntervals - Attempt to join these two intervals. On failure, this
+ /// joinIntervals - Attempt to join these two intervals. On failure, this
/// returns false. The output "SrcInt" will not have been modified, so we
/// can use this information below to update aliases.
- bool JoinIntervals(CoalescerPair &CP);
+ bool joinIntervals(CoalescerPair &CP);
- /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// Attempt joining with a reserved physreg.
+ bool joinReservedPhysReg(CoalescerPair &CP);
+
+ /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
/// the source value number is defined by a copy from the destination reg
/// see if we can merge these two destination reg valno# into a single
/// value number, eliminating a copy.
- bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+ bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
- /// HasOtherReachingDefs - Return true if there are definitions of IntB
+ /// hasOtherReachingDefs - Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
- bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
VNInfo *AValNo, VNInfo *BValNo);
- /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy.
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
- bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+ bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
- /// ReMaterializeTrivialDef - If the source of a copy is defined by a
+ /// reMaterializeTrivialDef - If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
- /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
- bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
- unsigned DstReg, MachineInstr *CopyMI);
-
- /// shouldJoinPhys - Return true if a physreg copy should be joined.
- bool shouldJoinPhys(CoalescerPair &CP);
-
- /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
- /// two virtual registers from different register classes.
- bool isWinToJoinCrossClass(unsigned SrcReg,
- unsigned DstReg,
- const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const TargetRegisterClass *NewRC);
-
- /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
+ MachineInstr *CopyMI);
+
+ /// canJoinPhys - Return true if a physreg copy should be joined.
+ bool canJoinPhys(CoalescerPair &CP);
+
+ /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
/// physical register and the existing subregister number of the def / use
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
- void UpdateRegDefsUses(const CoalescerPair &CP);
-
- /// RemoveDeadDef - If a def of a live interval is now determined dead,
- /// remove the val# it defines. If the live interval becomes empty, remove
- /// it as well.
- bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
-
- /// markAsJoined - Remember that CopyMI has already been joined.
- void markAsJoined(MachineInstr *CopyMI);
+ void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
/// eliminateUndefCopy - Handle copies of undef values.
bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
@@ -233,7 +219,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
}
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
- SrcReg = DstReg = SubIdx = 0;
+ SrcReg = DstReg = 0;
+ SrcIdx = DstIdx = 0;
NewRC = 0;
Flipped = CrossClass = false;
@@ -271,39 +258,44 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
}
} else {
// Both registers are virtual.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
// Both registers have subreg indices.
if (SrcSub && DstSub) {
- // For now we only handle the case of identical indices in commensurate
- // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg
- // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg.
- if (SrcSub != DstSub)
+ // Copies between different sub-registers are never coalescable.
+ if (Src == Dst && SrcSub != DstSub)
return false;
- const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
- if (!TRI.getCommonSubClass(DstRC, SrcRC))
+
+ NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
+ SrcIdx, DstIdx);
+ if (!NewRC)
return false;
- SrcSub = DstSub = 0;
+ } else if (DstSub) {
+ // SrcReg will be merged with a sub-register of DstReg.
+ SrcIdx = DstSub;
+ NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ } else if (SrcSub) {
+ // DstReg will be merged with a sub-register of SrcReg.
+ DstIdx = SrcSub;
+ NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
+ } else {
+ // This is a straight copy without sub-registers.
+ NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
}
- // There can be no SrcSub.
- if (SrcSub) {
+ // The combined constraint may be impossible to satisfy.
+ if (!NewRC)
+ return false;
+
+ // Prefer SrcReg to be a sub-register of DstReg.
+ // FIXME: Coalescer should support subregs symmetrically.
+ if (DstIdx && !SrcIdx) {
std::swap(Src, Dst);
- DstSub = SrcSub;
- SrcSub = 0;
- assert(!Flipped && "Unexpected flip");
- Flipped = true;
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
}
- // Find the new register class.
- const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
- if (DstSub)
- NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
- else
- NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
- if (!NewRC)
- return false;
CrossClass = NewRC != DstRC || NewRC != SrcRC;
}
// Check our invariants
@@ -312,14 +304,14 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
"Cannot have a physical SubIdx");
SrcReg = Src;
DstReg = Dst;
- SubIdx = DstSub;
return true;
}
bool CoalescerPair::flip() {
- if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg))
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
return false;
std::swap(SrcReg, DstReg);
+ std::swap(SrcIdx, DstIdx);
Flipped = !Flipped;
return true;
}
@@ -343,7 +335,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
if (!TargetRegisterInfo::isPhysicalRegister(Dst))
return false;
- assert(!SubIdx && "Inconsistent CoalescerPair state.");
+ assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
// DstSub could be set for a physreg from INSERT_SUBREG.
if (DstSub)
Dst = TRI.getSubReg(Dst, DstSub);
@@ -357,7 +349,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (DstReg != Dst)
return false;
// Registers match, do the subregisters line up?
- return compose(TRI, SubIdx, SrcSub) == DstSub;
+ return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub);
}
}
@@ -375,19 +367,18 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
- /// Joined copies are not deleted immediately, but kept in JoinedCopies.
- JoinedCopies.insert(CopyMI);
+void RegisterCoalescer::eliminateDeadDefs() {
+ SmallVector<LiveInterval*, 8> NewRegs;
+ LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs);
+}
- /// Mark all register operands of CopyMI as <undef> so they won't affect dead
- /// code elimination.
- for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
- E = CopyMI->operands_end(); I != E; ++I)
- if (I->isReg())
- I->setIsUndef(true);
+// Callback from eliminateDeadDefs().
+void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
+ // MI may be in WorkList. Make sure we don't visit it.
+ ErasedInstrs.insert(MI);
}
-/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
/// being the source and IntB being the dest, thus this defines a value number
/// in IntB. If the source value number (in IntA) is defined by a copy from B,
/// see if we can merge these two pieces of B into a single value number,
@@ -402,12 +393,10 @@ void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
///
/// This returns true if an interval was modified.
///
-bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
- MachineInstr *CopyMI) {
- // Bail if there is no dst interval - can happen when merging physical subreg
- // operations.
- if (!LIS->hasInterval(CP.getDstReg()))
- return false;
+bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert(!CP.isPartial() && "This doesn't work for partial copies.");
+ assert(!CP.isPhys() && "This doesn't work for physreg copies.");
LiveInterval &IntA =
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -457,24 +446,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// IntB, we can merge them.
if (ValLR+1 != BLR) return false;
- // If a live interval is a physical register, conservatively check if any
- // of its aliases is overlapping the live interval of the virtual register.
- // If so, do not coalesce.
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
- if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) {
- DEBUG({
- dbgs() << "\t\tInterfere with alias ";
- LIS->getInterval(*AS).print(dbgs(), TRI);
- });
- return false;
- }
- }
-
- DEBUG({
- dbgs() << "Extending: ";
- IntB.print(dbgs(), TRI);
- });
+ DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
// We are about to delete CopyMI, so need to remove it as the 'instruction
@@ -487,19 +459,6 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// two value numbers.
IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
- // If the IntB live range is assigned to a physical register, and if that
- // physreg has sub-registers, update their live intervals as well.
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
- if (!LIS->hasInterval(*SR))
- continue;
- LiveInterval &SRLI = LIS->getInterval(*SR);
- SRLI.addRange(LiveRange(FillerStart, FillerEnd,
- SRLI.getNextValue(FillerStart,
- LIS->getVNInfoAllocator())));
- }
- }
-
// Okay, merge "B1" into the same value number as "B0".
if (BValNo != ValLR->valno) {
// If B1 is killed by a PHI, then the merged live range must also be killed
@@ -509,11 +468,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
if (HasPHIKill)
ValLR->valno->setHasPHIKill(true);
}
- DEBUG({
- dbgs() << " result = ";
- IntB.print(dbgs(), TRI);
- dbgs() << "\n";
- });
+ DEBUG(dbgs() << " result = " << IntB << '\n');
// If the source instruction was killing the source register before the
// merge, unset the isKill marker given the live range has been extended.
@@ -525,8 +480,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// Rewrite the copy. If the copy instruction was killing the destination
// register before the merge, find the last use and trim the live range. That
// will also add the isKill marker.
- CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(),
- *TRI);
+ CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
if (ALR->end == CopyIdx)
LIS->shrinkToUses(&IntA);
@@ -534,12 +488,12 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
return true;
}
-/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// hasOtherReachingDefs - Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
-bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
- LiveInterval &IntB,
- VNInfo *AValNo,
- VNInfo *BValNo) {
+bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -559,7 +513,7 @@ bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
return false;
}
-/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with
/// IntA being the source and IntB being the dest, thus this defines a value
/// number in IntB. If the source value number (in IntA) is defined by a
/// commutable instruction and its other operand is coalesced to the copy dest
@@ -582,18 +536,9 @@ bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
///
/// This returns true if an interval was modified.
///
-bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
- MachineInstr *CopyMI) {
- // FIXME: For now, only eliminate the copy by commuting its def when the
- // source register is a virtual register. We want to guard against cases
- // where the copy is a back edge copy and commuting the def lengthen the
- // live interval of the source register to the entire loop.
- if (CP.isPhys() && CP.isFlipped())
- return false;
-
- // Bail if there is no dst interval.
- if (!LIS->hasInterval(CP.getDstReg()))
- return false;
+bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert (!CP.isPhys());
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
@@ -647,17 +592,9 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// Make sure there are no other definitions of IntB that would reach the
// uses which the new definition can reach.
- if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
return false;
- // Abort if the aliases of IntB.reg have values that are not simply the
- // clobbers from the superreg.
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
- for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
- if (LIS->hasInterval(*AS) &&
- HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0))
- return false;
-
// If some of the uses of IntA.reg is already coalesced away, return false.
// It's not possible to determine whether it's safe to perform the coalescing.
for (MachineRegisterInfo::use_nodbg_iterator UI =
@@ -666,13 +603,14 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *UseMI = &*UI;
SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
- if (ULR == IntA.end())
+ if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
- if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+ // If this use is tied to a def, we can't rewrite the register.
+ if (UseMI->isRegTiedToDefOperand(UI.getOperandNo()))
return false;
}
- DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+ DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
<< *DefMI);
// At this point we have decided that it is legal to do this
@@ -709,8 +647,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = &*UI;
++UI;
- if (JoinedCopies.count(UseMI))
- continue;
if (UseMI->isDebugValue()) {
// FIXME These don't have an instruction index. Not clear we have enough
// info to decide whether to do this replacement or not. For now do it.
@@ -742,7 +678,9 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
assert(DVNI->def == DefIdx);
BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
- markAsJoined(UseMI);
+ ErasedInstrs.insert(UseMI);
+ LIS->RemoveMachineInstrFromMaps(UseMI);
+ UseMI->eraseFromParent();
}
// Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
@@ -762,12 +700,11 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return true;
}
-/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
-bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
- bool preserveSrcInt,
- unsigned DstReg,
- MachineInstr *CopyMI) {
+bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
+ unsigned DstReg,
+ MachineInstr *CopyMI) {
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
assert(SrcLR != SrcInt.end() && "Live range not found!");
@@ -792,7 +729,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
// Make sure the copy destination register class fits the instruction
// definition register class. The mismatch can happen as a result of earlier
// extract_subreg, insert_subreg, subreg_to_reg coalescing.
- const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF);
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
if (MRI->getRegClass(DstReg) != RC)
return false;
@@ -838,23 +775,21 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
- unsigned reg = NewMIImplDefs[i];
- LiveInterval &li = LIS->getInterval(reg);
- VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(),
- LIS->getVNInfoAllocator());
- LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN);
- li.addRange(lr);
+ unsigned Reg = NewMIImplDefs[i];
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = LIS->getCachedRegUnit(*Units))
+ LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
CopyMI->eraseFromParent();
- ReMatCopies.insert(CopyMI);
- ReMatDefs.insert(DefMI);
+ ErasedInstrs.insert(CopyMI);
DEBUG(dbgs() << "Remat: " << *NewMI);
++NumReMats;
// The source interval can become smaller because we removed a use.
- if (preserveSrcInt)
- LIS->shrinkToUses(&SrcInt);
+ LIS->shrinkToUses(&SrcInt, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
return true;
}
@@ -902,51 +837,40 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
return true;
}
-/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
/// physical register and the existing subregister number of the def / use
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
-void
-RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
- bool DstIsPhys = CP.isPhys();
- unsigned SrcReg = CP.getSrcReg();
- unsigned DstReg = CP.getDstReg();
- unsigned SubIdx = CP.getSubIdx();
+void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
// Update LiveDebugVariables.
LDV->renameRegister(SrcReg, DstReg, SubIdx);
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
- // A PhysReg copy that won't be coalesced can perhaps be rematerialized
- // instead.
- if (DstIsPhys) {
- if (UseMI->isFullCopy() &&
- UseMI->getOperand(1).getReg() == SrcReg &&
- UseMI->getOperand(0).getReg() != SrcReg &&
- UseMI->getOperand(0).getReg() != DstReg &&
- !JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false,
- UseMI->getOperand(0).getReg(), UseMI))
- continue;
- }
-
SmallVector<unsigned,8> Ops;
bool Reads, Writes;
tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+ // If SrcReg wasn't read, it may still be the case that DstReg is live-in
+ // because SrcReg is a sub-register.
+ if (DstInt && !Reads && SubIdx)
+ Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI));
+
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = UseMI->getOperand(Ops[i]);
- // Make sure we don't create read-modify-write defs accidentally. We
- // assume here that a SrcReg def cannot be joined into a live DstReg. If
- // RegisterCoalescer starts tracking partially live registers, we will
- // need to check the actual LiveInterval to determine if DstReg is live
- // here.
- if (SubIdx && !Reads)
- MO.setIsUndef();
+ // Adjust <undef> flags in case of sub-register joins. We don't want to
+ // turn a full def into a read-modify-write sub-register def and vice
+ // versa.
+ if (SubIdx && MO.isDef())
+ MO.setIsUndef(!Reads);
if (DstIsPhys)
MO.substPhysReg(DstReg, *TRI);
@@ -954,10 +878,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
MO.substVirtReg(DstReg, SubIdx, *TRI);
}
- // This instruction is a copy that will be removed.
- if (JoinedCopies.count(UseMI))
- continue;
-
DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugValue())
@@ -967,210 +887,107 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
}
}
-/// removeIntervalIfEmpty - Check if the live interval of a physical register
-/// is empty, if so remove it and also remove the empty intervals of its
-/// sub-registers. Return true if live interval is removed.
-static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
- const TargetRegisterInfo *TRI) {
- if (li.empty()) {
- if (TargetRegisterInfo::isPhysicalRegister(li.reg))
- for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
- if (!LIS->hasInterval(*SR))
- continue;
- LiveInterval &sli = LIS->getInterval(*SR);
- if (sli.empty())
- LIS->removeInterval(*SR);
- }
- LIS->removeInterval(li.reg);
- return true;
- }
- return false;
-}
-
-/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
-/// the val# it defines. If the live interval becomes empty, remove it as well.
-bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
- MachineInstr *DefMI) {
- SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot();
- LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
- if (DefIdx != MLR->valno->def)
- return false;
- li.removeValNo(MLR->valno);
- return removeIntervalIfEmpty(li, LIS, TRI);
-}
-
-/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
-/// We need to be careful about coalescing a source physical register with a
-/// virtual register. Once the coalescing is done, it cannot be broken and these
-/// are not spillable! If the destination interval uses are far away, think
-/// twice about coalescing them!
-bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
- bool Allocatable = LIS->isAllocatable(CP.getDstReg());
- LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
-
+/// canJoinPhys - Return true if a copy involving a physreg should be joined.
+bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
/// always beneficial.
- if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
- return true;
-
- if (!EnablePhysicalJoin) {
- DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
+ if (!RegClassInfo.isReserved(CP.getDstReg())) {
+ DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
return false;
}
- // Only coalesce to allocatable physreg, we don't want to risk modifying
- // reserved registers.
- if (!Allocatable) {
- DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
- return false; // Not coalescable.
- }
-
- // Don't join with physregs that have a ridiculous number of live
- // ranges. The data structure performance is really bad when that
- // happens.
- if (LIS->hasInterval(CP.getDstReg()) &&
- LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) {
- ++numAborts;
- DEBUG(dbgs()
- << "\tPhysical register live interval too complicated, abort!\n");
- return false;
- }
-
- // FIXME: Why are we skipping this test for partial copies?
- // CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
- if (!CP.isPartial()) {
- const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg());
- unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
- unsigned Length = LIS->getApproximateInstructionCount(JoinVInt);
- if (Length > Threshold) {
- ++numAborts;
- DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
- return false;
- }
- }
- return true;
-}
-
-/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
-/// two virtual registers from different register classes.
-bool
-RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
- unsigned DstReg,
- const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const TargetRegisterClass *NewRC) {
- unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
- // This heuristics is good enough in practice, but it's obviously not *right*.
- // 4 is a magic number that works well enough for x86, ARM, etc. It filter
- // out all but the most restrictive register classes.
- if (NewRCCount > 4 ||
- // Early exit if the function is fairly small, coalesce aggressively if
- // that's the case. For really special register classes with 3 or
- // fewer registers, be a bit more careful.
- (LIS->getFuncInstructionCount() / NewRCCount) < 8)
- return true;
- LiveInterval &SrcInt = LIS->getInterval(SrcReg);
- LiveInterval &DstInt = LIS->getInterval(DstReg);
- unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt);
- unsigned DstSize = LIS->getApproximateInstructionCount(DstInt);
-
- // Coalesce aggressively if the intervals are small compared to the number of
- // registers in the new class. The number 4 is fairly arbitrary, chosen to be
- // less aggressive than the 8 used for the whole function size.
- const unsigned ThresSize = 4 * NewRCCount;
- if (SrcSize <= ThresSize && DstSize <= ThresSize)
+ LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
+ if (CP.isFlipped() && JoinVInt.containsOneValue())
return true;
- // Estimate *register use density*. If it doubles or more, abort.
- unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg),
- MRI->use_nodbg_end());
- unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg),
- MRI->use_nodbg_end());
- unsigned NewUses = SrcUses + DstUses;
- unsigned NewSize = SrcSize + DstSize;
- if (SrcRC != NewRC && SrcSize > ThresSize) {
- unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
- if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
- return false;
- }
- if (DstRC != NewRC && DstSize > ThresSize) {
- unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
- if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
- return false;
- }
- return true;
+ DEBUG(dbgs() << "\tCannot join defs into reserved register.\n");
+ return false;
}
-
-/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns true
/// if the copy was successfully coalesced away. If it is not currently
/// possible to coalesce this interval, but it may be possible if other
/// things get coalesced, then it returns true by reference in 'Again'.
-bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
+bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
Again = false;
- if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
- return false; // Already done.
-
DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
- CoalescerPair CP(*TII, *TRI);
+ CoalescerPair CP(*TRI);
if (!CP.setRegisters(CopyMI)) {
DEBUG(dbgs() << "\tNot coalescable.\n");
return false;
}
- // If they are already joined we continue.
- if (CP.getSrcReg() == CP.getDstReg()) {
- markAsJoined(CopyMI);
- DEBUG(dbgs() << "\tCopy already coalesced.\n");
- return false; // Not coalescable.
+ // Dead code elimination. This really should be handled by MachineDCE, but
+ // sometimes dead copies slip through, and we can't generate invalid live
+ // ranges.
+ if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
+ DEBUG(dbgs() << "\tCopy is dead.\n");
+ DeadDefs.push_back(CopyMI);
+ eliminateDeadDefs();
+ return true;
}
// Eliminate undefs.
if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) {
- markAsJoined(CopyMI);
DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n");
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
return false; // Not coalescable.
}
- DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
- << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx())
- << "\n");
+ // Coalesced copies are normally removed immediately, but transformations
+ // like removeCopyByCommutingDef() can inadvertently create identity copies.
+ // When that happens, just join the values and remove the copy.
+ if (CP.getSrcReg() == CP.getDstReg()) {
+ LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
+ LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI));
+ if (VNInfo *DefVNI = LRQ.valueDefined()) {
+ VNInfo *ReadVNI = LRQ.valueIn();
+ assert(ReadVNI && "No value before copy and no <undef> flag.");
+ assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
+ LI.MergeValueNumberInto(DefVNI, ReadVNI);
+ DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
+ }
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ return true;
+ }
// Enforce policies.
if (CP.isPhys()) {
- if (!shouldJoinPhys(CP)) {
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
+ << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
+ << '\n');
+ if (!canJoinPhys(CP)) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true,
+ reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
CP.getDstReg(), CopyMI))
return true;
return false;
}
} else {
- // Avoid constraining virtual register regclass too much.
- if (CP.isCrossClass()) {
- DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
- if (DisableCrossClassJoin) {
- DEBUG(dbgs() << "\tCross-class joins disabled.\n");
- return false;
- }
- if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
- MRI->getRegClass(CP.getSrcReg()),
- MRI->getRegClass(CP.getDstReg()),
- CP.getNewRC())) {
- DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
- }
+ DEBUG({
+ dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName()
+ << " with ";
+ if (CP.getDstIdx() && CP.getSrcIdx())
+ dbgs() << PrintReg(CP.getDstReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
+ << PrintReg(CP.getSrcReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
+ else
+ dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
+ << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
+ });
// When possible, let DstReg be the larger interval.
- if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
LIS->getInterval(CP.getDstReg()).ranges.size())
CP.flip();
}
@@ -1179,21 +996,22 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
// Otherwise, if one of the intervals being joined is a physreg, this method
// always canonicalizes DstInt to be it. The output "SrcInt" will not have
// been modified, so we can use this information below to update aliases.
- if (!JoinIntervals(CP)) {
+ if (!joinIntervals(CP)) {
// Coalescing failed.
// If definition of source is defined by trivial computation, try
// rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true,
+ reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
CP.getDstReg(), CopyMI))
return true;
// If we can eliminate the copy without merging the live ranges, do so now.
- if (!CP.isPartial()) {
- if (AdjustCopiesBackFrom(CP, CopyMI) ||
- RemoveCopyByCommutingDef(CP, CopyMI)) {
- markAsJoined(CopyMI);
+ if (!CP.isPartial() && !CP.isPhys()) {
+ if (adjustCopiesBackFrom(CP, CopyMI) ||
+ removeCopyByCommutingDef(CP, CopyMI)) {
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
DEBUG(dbgs() << "\tTrivial!\n");
return true;
}
@@ -1212,29 +1030,21 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
}
- // Remember to delete the copy instruction.
- markAsJoined(CopyMI);
+ // Removing sub-register copies can ease the register class constraints.
+ // Make sure we attempt to inflate the register class of DstReg.
+ if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
+ InflateRegs.push_back(CP.getDstReg());
- UpdateRegDefsUses(CP);
+ // CopyMI has been erased by joinIntervals at this point. Remove it from
+ // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
+ // to the work list. This keeps ErasedInstrs from growing needlessly.
+ ErasedInstrs.erase(CopyMI);
- // If we have extended the live range of a physical register, make sure we
- // update live-in lists as well.
- if (CP.isPhys()) {
- SmallVector<MachineBasicBlock*, 16> BlockSeq;
- // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
- // ranges for this, and they are preserved.
- LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg());
- for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
- I != E; ++I ) {
- LIS->findLiveInMBBs(I->start, I->end, BlockSeq);
- for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
- MachineBasicBlock &block = *BlockSeq[idx];
- if (!block.isLiveIn(CP.getDstReg()))
- block.addLiveIn(CP.getDstReg());
- }
- BlockSeq.clear();
- }
- }
+ // Rewrite all SrcReg operands to DstReg.
+ // Also update DstReg operands to include DstIdx if it is set.
+ if (CP.getDstIdx())
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
// SrcReg is guaranteed to be the register whose live interval that is
// being merged.
@@ -1244,16 +1054,51 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
DEBUG({
- LiveInterval &DstInt = LIS->getInterval(CP.getDstReg());
- dbgs() << "\tJoined. Result = ";
- DstInt.print(dbgs(), TRI);
- dbgs() << "\n";
+ dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI);
+ if (!CP.isPhys())
+ dbgs() << LIS->getInterval(CP.getDstReg());
+ dbgs() << '\n';
});
++numJoins;
return true;
}
+/// Attempt joining with a reserved physreg.
+bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+ assert(CP.isPhys() && "Must be a physreg copy");
+ assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register");
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << '\n');
+
+ assert(CP.isFlipped() && RHS.containsOneValue() &&
+ "Invalid join with reserved register");
+
+ // Optimization for reserved registers like ESP. We can only merge with a
+ // reserved physreg if RHS has a single value that is a copy of CP.DstReg().
+ // The live range of the reserved register will look like a set of dead defs
+ // - we don't properly track the live range of reserved registers.
+
+ // Deny any overlapping intervals. This depends on all the reserved
+ // register live ranges to look like dead defs.
+ for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI)
+ if (RHS.overlaps(LIS->getRegUnit(*UI))) {
+ DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+ return false;
+ }
+
+ // Skip any value computations, we are not adding new values to the
+ // reserved register. Also skip merging the live ranges, the reserved
+ // register live range doesn't need to be accurate as long as all the
+ // defs are there.
+
+ // We don't track kills for reserved registers.
+ MRI->clearKillFlags(CP.getSrcReg());
+
+ return true;
+}
+
/// ComputeUltimateVN - Assuming we are going to join two live intervals,
/// compute what the resultant value numbers for each value in the input two
/// ranges will be. This is complicated by copies between the two which can
@@ -1320,144 +1165,70 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
const TargetRegisterInfo &tri,
CoalescerPair &CP,
VNInfo *VNI,
- LiveRange *LR,
+ VNInfo *OtherVNI,
SmallVector<MachineInstr*, 8> &DupCopies) {
// FIXME: This is very conservative. For example, we don't handle
// physical registers.
MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
- if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+ if (!MI || CP.isPartial() || CP.isPhys())
return false;
- unsigned Dst = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
-
- if (!TargetRegisterInfo::isVirtualRegister(Src) ||
- !TargetRegisterInfo::isVirtualRegister(Dst))
+ unsigned A = CP.getDstReg();
+ if (!TargetRegisterInfo::isVirtualRegister(A))
return false;
- unsigned A = CP.getDstReg();
unsigned B = CP.getSrcReg();
-
- if (B == Dst)
- std::swap(A, B);
- assert(Dst == A);
-
- VNInfo *Other = LR->valno;
- const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def);
-
- if (!OtherMI || !OtherMI->isFullCopy())
+ if (!TargetRegisterInfo::isVirtualRegister(B))
return false;
- unsigned OtherDst = OtherMI->getOperand(0).getReg();
- unsigned OtherSrc = OtherMI->getOperand(1).getReg();
-
- if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) ||
- !TargetRegisterInfo::isVirtualRegister(OtherDst))
+ MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def);
+ if (!OtherMI)
return false;
- assert(OtherDst == B);
-
- if (Src != OtherSrc)
- return false;
+ if (MI->isImplicitDef()) {
+ DupCopies.push_back(MI);
+ return true;
+ } else {
+ if (!MI->isFullCopy())
+ return false;
+ unsigned Src = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Src))
+ return false;
+ if (!OtherMI->isFullCopy())
+ return false;
+ unsigned OtherSrc = OtherMI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(OtherSrc))
+ return false;
- // If the copies use two different value numbers of X, we cannot merge
- // A and B.
- LiveInterval &SrcInt = li.getInterval(Src);
- // getVNInfoBefore returns NULL for undef copies. In this case, the
- // optimization is still safe.
- if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def))
- return false;
+ if (Src != OtherSrc)
+ return false;
- DupCopies.push_back(MI);
+ // If the copies use two different value numbers of X, we cannot merge
+ // A and B.
+ LiveInterval &SrcInt = li.getInterval(Src);
+ // getVNInfoBefore returns NULL for undef copies. In this case, the
+ // optimization is still safe.
+ if (SrcInt.getVNInfoBefore(OtherVNI->def) !=
+ SrcInt.getVNInfoBefore(VNI->def))
+ return false;
- return true;
+ DupCopies.push_back(MI);
+ return true;
+ }
}
-/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// joinIntervals - Attempt to join these two intervals. On failure, this
/// returns false.
-bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
- LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
- DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; });
-
- // If a live interval is a physical register, check for interference with any
- // aliases. The interference check implemented here is a bit more conservative
- // than the full interfeence check below. We allow overlapping live ranges
- // only when one is a copy of the other.
- if (CP.isPhys()) {
- // Optimization for reserved registers like ESP.
- // We can only merge with a reserved physreg if RHS has a single value that
- // is a copy of CP.DstReg(). The live range of the reserved register will
- // look like a set of dead defs - we don't properly track the live range of
- // reserved registers.
- if (RegClassInfo.isReserved(CP.getDstReg())) {
- assert(CP.isFlipped() && RHS.containsOneValue() &&
- "Invalid join with reserved register");
- // Deny any overlapping intervals. This depends on all the reserved
- // register live ranges to look like dead defs.
- for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) {
- if (!LIS->hasInterval(*AS)) {
- // Make sure at least DstReg itself exists before attempting a join.
- if (*AS == CP.getDstReg())
- LIS->getOrCreateInterval(CP.getDstReg());
- continue;
- }
- if (RHS.overlaps(LIS->getInterval(*AS))) {
- DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n');
- return false;
- }
- }
- // Skip any value computations, we are not adding new values to the
- // reserved register. Also skip merging the live ranges, the reserved
- // register live range doesn't need to be accurate as long as all the
- // defs are there.
- return true;
- }
-
- // Check if a register mask clobbers DstReg.
- BitVector UsableRegs;
- if (LIS->checkRegMaskInterference(RHS, UsableRegs) &&
- !UsableRegs.test(CP.getDstReg())) {
- DEBUG(dbgs() << "\t\tRegister mask interference.\n");
- return false;
- }
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+ // Handle physreg joins separately.
+ if (CP.isPhys())
+ return joinReservedPhysReg(CP);
- for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
- if (!LIS->hasInterval(*AS))
- continue;
- const LiveInterval &LHS = LIS->getInterval(*AS);
- LiveInterval::const_iterator LI = LHS.begin();
- for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
- RI != RE; ++RI) {
- LI = std::lower_bound(LI, LHS.end(), RI->start);
- // Does LHS have an overlapping live range starting before RI?
- if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
- (RI->start != RI->valno->def ||
- !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) {
- DEBUG({
- dbgs() << "\t\tInterference from alias: ";
- LHS.print(dbgs(), TRI);
- dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
- });
- return false;
- }
-
- // Check that LHS ranges beginning in this range are copies.
- for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
- if (LI->start != LI->valno->def ||
- !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) {
- DEBUG({
- dbgs() << "\t\tInterference from alias: ";
- LHS.print(dbgs(), TRI);
- dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
- });
- return false;
- }
- }
- }
- }
- }
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << '\n');
// Compute the final value assignment, assuming that the live ranges can be
// coalesced.
@@ -1468,9 +1239,11 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
SmallVector<MachineInstr*, 8> DupCopies;
+ SmallVector<MachineInstr*, 8> DeadCopies;
LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg());
- DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; });
+ DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS
+ << '\n');
// Loop over the value numbers of the LHS, seeing if any are defined from
// the RHS.
@@ -1481,21 +1254,24 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
continue;
MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
assert(MI && "Missing def");
- if (!MI->isCopyLike()) // Src not defined by a copy?
+ if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
continue;
// Figure out the value # from the RHS.
- LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def);
// The copy could be to an aliased physreg.
- if (!lr) continue;
+ if (!OtherVNI)
+ continue;
// DstReg is known to be a register in the LHS interval. If the src is
// from the RHS interval, we can use its value #.
- if (!CP.isCoalescable(MI) &&
- !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
+ if (CP.isCoalescable(MI))
+ DeadCopies.push_back(MI);
+ else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
+ DupCopies))
continue;
- LHSValsDefinedFromRHS[VNI] = lr->valno;
+ LHSValsDefinedFromRHS[VNI] = OtherVNI;
}
// Loop over the value numbers of the RHS, seeing if any are defined from
@@ -1507,21 +1283,24 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
continue;
MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
assert(MI && "Missing def");
- if (!MI->isCopyLike()) // Src not defined by a copy?
+ if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
continue;
// Figure out the value # from the LHS.
- LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def);
// The copy could be to an aliased physreg.
- if (!lr) continue;
+ if (!OtherVNI)
+ continue;
// DstReg is known to be a register in the RHS interval. If the src is
// from the LHS interval, we can use its value #.
- if (!CP.isCoalescable(MI) &&
- !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
+ if (CP.isCoalescable(MI))
+ DeadCopies.push_back(MI);
+ else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
+ DupCopies))
continue;
- RHSValsDefinedFromLHS[VNI] = lr->valno;
+ RHSValsDefinedFromLHS[VNI] = OtherVNI;
}
LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
@@ -1563,6 +1342,10 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
LiveInterval::const_iterator J = RHS.begin();
LiveInterval::const_iterator JE = RHS.end();
+ // Collect interval end points that will no longer be kills.
+ SmallVector<MachineInstr*, 8> LHSOldKills;
+ SmallVector<MachineInstr*, 8> RHSOldKills;
+
// Skip ahead until the first place of potential sharing.
if (I != IE && J != JE) {
if (I->start < J->start) {
@@ -1576,20 +1359,21 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
while (I != IE && J != JE) {
// Determine if these two live ranges overlap.
- bool Overlaps;
- if (I->start < J->start) {
- Overlaps = I->end > J->start;
- } else {
- Overlaps = J->end > I->start;
- }
-
// If so, check value # info to determine if they are really different.
- if (Overlaps) {
+ if (I->end > J->start && J->end > I->start) {
// If the live range overlap will map to the same value number in the
// result liverange, we can still coalesce them. If not, we can't.
if (LHSValNoAssignments[I->valno->id] !=
RHSValNoAssignments[J->valno->id])
return false;
+
+ // Extended live ranges should no longer be killed.
+ if (!I->end.isBlock() && I->end < J->end)
+ if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end))
+ LHSOldKills.push_back(MI);
+ if (!J->end.isBlock() && J->end < I->end)
+ if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end))
+ RHSOldKills.push_back(MI);
}
if (I->end < J->end)
@@ -1616,29 +1400,48 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
NewVNInfo[RHSValID]->setHasPHIKill(true);
}
+ // Clear kill flags where live ranges are extended.
+ while (!LHSOldKills.empty())
+ LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
+ while (!RHSOldKills.empty())
+ RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI);
+
if (LHSValNoAssignments.empty())
LHSValNoAssignments.push_back(-1);
if (RHSValNoAssignments.empty())
RHSValNoAssignments.push_back(-1);
+ // Now erase all the redundant copies.
+ for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) {
+ MachineInstr *MI = DeadCopies[i];
+ if (!ErasedInstrs.insert(MI))
+ continue;
+ DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI)
+ << '\t' << *MI);
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
+
SmallVector<unsigned, 8> SourceRegisters;
for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
E = DupCopies.end(); I != E; ++I) {
MachineInstr *MI = *I;
+ if (!ErasedInstrs.insert(MI))
+ continue;
- // We have pretended that the assignment to B in
+ // If MI is a copy, then we have pretended that the assignment to B in
// A = X
// B = X
// was actually a copy from A. Now that we decided to coalesce A and B,
// transform the code into
// A = X
- // X = X
- // and mark the X as coalesced to keep the illusion.
- unsigned Src = MI->getOperand(1).getReg();
- SourceRegisters.push_back(Src);
- MI->getOperand(0).substVirtReg(Src, 0, *TRI);
-
- markAsJoined(MI);
+ // In the case of the implicit_def, we just have to remove it.
+ if (!MI->isImplicitDef()) {
+ unsigned Src = MI->getOperand(1).getReg();
+ SourceRegisters.push_back(Src);
+ }
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
}
// If B = X was the last use of X in a liverange, we have to shrink it now
@@ -1678,73 +1481,58 @@ namespace {
};
}
-void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB,
- std::vector<MachineInstr*> &TryAgain) {
- DEBUG(dbgs() << MBB->getName() << ":\n");
-
- SmallVector<MachineInstr*, 8> VirtCopies;
- SmallVector<MachineInstr*, 8> PhysCopies;
- SmallVector<MachineInstr*, 8> ImpDefCopies;
- for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
- MII != E;) {
- MachineInstr *Inst = MII++;
-
- // If this isn't a copy nor a extract_subreg, we can't join intervals.
- unsigned SrcReg, DstReg;
- if (Inst->isCopy()) {
- DstReg = Inst->getOperand(0).getReg();
- SrcReg = Inst->getOperand(1).getReg();
- } else if (Inst->isSubregToReg()) {
- DstReg = Inst->getOperand(0).getReg();
- SrcReg = Inst->getOperand(2).getReg();
- } else
+// Try joining WorkList copies starting from index From.
+// Null out any successful joins.
+bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) {
+ assert(From <= WorkList.size() && "Out of range");
+ bool Progress = false;
+ for (unsigned i = From, e = WorkList.size(); i != e; ++i) {
+ if (!WorkList[i])
continue;
-
- bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
- bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty())
- ImpDefCopies.push_back(Inst);
- else if (SrcIsPhys || DstIsPhys)
- PhysCopies.push_back(Inst);
- else
- VirtCopies.push_back(Inst);
- }
-
- // Try coalescing implicit copies and insert_subreg <undef> first,
- // followed by copies to / from physical registers, then finally copies
- // from virtual registers to virtual registers.
- for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
- MachineInstr *TheCopy = ImpDefCopies[i];
- bool Again = false;
- if (!JoinCopy(TheCopy, Again))
- if (Again)
- TryAgain.push_back(TheCopy);
- }
- for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
- MachineInstr *TheCopy = PhysCopies[i];
- bool Again = false;
- if (!JoinCopy(TheCopy, Again))
- if (Again)
- TryAgain.push_back(TheCopy);
- }
- for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
- MachineInstr *TheCopy = VirtCopies[i];
+ // Skip instruction pointers that have already been erased, for example by
+ // dead code elimination.
+ if (ErasedInstrs.erase(WorkList[i])) {
+ WorkList[i] = 0;
+ continue;
+ }
bool Again = false;
- if (!JoinCopy(TheCopy, Again))
- if (Again)
- TryAgain.push_back(TheCopy);
+ bool Success = joinCopy(WorkList[i], Again);
+ Progress |= Success;
+ if (Success || !Again)
+ WorkList[i] = 0;
}
+ return Progress;
}
-void RegisterCoalescer::joinIntervals() {
+void
+RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ // Collect all copy-like instructions in MBB. Don't start coalescing anything
+ // yet, it might invalidate the iterator.
+ const unsigned PrevSize = WorkList.size();
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII)
+ if (MII->isCopyLike())
+ WorkList.push_back(MII);
+
+ // Try coalescing the collected copies immediately, and remove the nulls.
+ // This prevents the WorkList from getting too large since most copies are
+ // joinable on the first attempt.
+ if (copyCoalesceWorkList(PrevSize))
+ WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
+ (MachineInstr*)0), WorkList.end());
+}
+
+void RegisterCoalescer::joinAllIntervals() {
DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+ assert(WorkList.empty() && "Old data still around.");
- std::vector<MachineInstr*> TryAgainList;
if (Loops->empty()) {
// If there are no loops in the function, join intervals in function order.
for (MachineFunction::iterator I = MF->begin(), E = MF->end();
I != E; ++I)
- CopyCoalesceInMBB(I, TryAgainList);
+ copyCoalesceInMBB(I);
} else {
// Otherwise, join intervals in inner loops before other intervals.
// Unfortunately we can't just iterate over loop hierarchy here because
@@ -1763,34 +1551,20 @@ void RegisterCoalescer::joinIntervals() {
// Finally, join intervals in loop nest order.
for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
- CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+ copyCoalesceInMBB(MBBs[i].second);
}
// Joining intervals can allow other intervals to be joined. Iteratively join
// until we make no progress.
- bool ProgressMade = true;
- while (ProgressMade) {
- ProgressMade = false;
-
- for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
- MachineInstr *&TheCopy = TryAgainList[i];
- if (!TheCopy)
- continue;
-
- bool Again = false;
- bool Success = JoinCopy(TheCopy, Again);
- if (Success || !Again) {
- TheCopy= 0; // Mark this one as done.
- ProgressMade = true;
- }
- }
- }
+ while (copyCoalesceWorkList())
+ /* empty */ ;
}
void RegisterCoalescer::releaseMemory() {
- JoinedCopies.clear();
- ReMatCopies.clear();
- ReMatDefs.clear();
+ ErasedInstrs.clear();
+ WorkList.clear();
+ DeadDefs.clear();
+ InflateRegs.clear();
}
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
@@ -1814,138 +1588,11 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
RegClassInfo.runOnMachineFunction(fn);
// Join (coalesce) intervals if requested.
- if (EnableJoining) {
- joinIntervals();
- DEBUG({
- dbgs() << "********** INTERVALS POST JOINING **********\n";
- for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end();
- I != E; ++I){
- I->second->print(dbgs(), TRI);
- dbgs() << "\n";
- }
- });
- }
-
- // Perform a final pass over the instructions and compute spill weights
- // and remove identity moves.
- SmallVector<unsigned, 4> DeadDefs, InflateRegs;
- for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
- mbbi != mbbe; ++mbbi) {
- MachineBasicBlock* mbb = mbbi;
- for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
- mii != mie; ) {
- MachineInstr *MI = mii;
- if (JoinedCopies.count(MI)) {
- // Delete all coalesced copies.
- bool DoDelete = true;
- assert(MI->isCopyLike() && "Unrecognized copy instruction");
- unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
- unsigned DstReg = MI->getOperand(0).getReg();
-
- // Collect candidates for register class inflation.
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
- InflateRegs.push_back(SrcReg);
- if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
- RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
- InflateRegs.push_back(DstReg);
-
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- MI->getNumOperands() > 2)
- // Do not delete extract_subreg, insert_subreg of physical
- // registers unless the definition is dead. e.g.
- // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
- // or else the scavenger may complain. LowerSubregs will
- // delete them later.
- DoDelete = false;
-
- if (MI->allDefsAreDead()) {
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- LIS->hasInterval(SrcReg))
- LIS->shrinkToUses(&LIS->getInterval(SrcReg));
- DoDelete = true;
- }
- if (!DoDelete) {
- // We need the instruction to adjust liveness, so make it a KILL.
- if (MI->isSubregToReg()) {
- MI->RemoveOperand(3);
- MI->RemoveOperand(1);
- }
- MI->setDesc(TII->get(TargetOpcode::KILL));
- mii = llvm::next(mii);
- } else {
- LIS->RemoveMachineInstrFromMaps(MI);
- mii = mbbi->erase(mii);
- ++numPeep;
- }
- continue;
- }
-
- // Now check if this is a remat'ed def instruction which is now dead.
- if (ReMatDefs.count(MI)) {
- bool isDead = true;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
- continue;
- DeadDefs.push_back(Reg);
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- // Remat may also enable register class inflation.
- if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
- InflateRegs.push_back(Reg);
- }
- if (MO.isDead())
- continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !MRI->use_nodbg_empty(Reg)) {
- isDead = false;
- break;
- }
- }
- if (isDead) {
- while (!DeadDefs.empty()) {
- unsigned DeadDef = DeadDefs.back();
- DeadDefs.pop_back();
- RemoveDeadDef(LIS->getInterval(DeadDef), MI);
- }
- LIS->RemoveMachineInstrFromMaps(mii);
- mii = mbbi->erase(mii);
- continue;
- } else
- DeadDefs.clear();
- }
-
- ++mii;
-
- // Check for now unnecessary kill flags.
- if (LIS->isNotInMIMap(MI)) continue;
- SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isKill()) continue;
- unsigned reg = MO.getReg();
- if (!reg || !LIS->hasInterval(reg)) continue;
- if (!LIS->getInterval(reg).killedAt(DefIdx)) {
- MO.setIsKill(false);
- continue;
- }
- // When leaving a kill flag on a physreg, check if any subregs should
- // remain alive.
- if (!TargetRegisterInfo::isPhysicalRegister(reg))
- continue;
- for (const uint16_t *SR = TRI->getSubRegisters(reg);
- unsigned S = *SR; ++SR)
- if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx))
- MI->addRegisterDefined(S, TRI);
- }
- }
- }
+ if (EnableJoining)
+ joinAllIntervals();
// After deleting a lot of copies, register classes may be less constrained.
- // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
// DPR inflation.
array_pod_sort(InflateRegs.begin(), InflateRegs.end());
InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 310b933..8a6df98 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -26,7 +26,6 @@ namespace llvm {
/// two registers can be coalesced, CoalescerPair can determine if a copy
/// instruction would become an identity copy after coalescing.
class CoalescerPair {
- const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
/// DstReg - The register that will be left after coalescing. It can be a
@@ -36,10 +35,13 @@ namespace llvm {
/// SrcReg - the virtual register that will be coalesced into dstReg.
unsigned SrcReg;
- /// subReg_ - The subregister index of srcReg in DstReg. It is possible the
- /// coalesce SrcReg into a subreg of the larger DstReg when DstReg is a
- /// virtual register.
- unsigned SubIdx;
+ /// DstIdx - The sub-register index of the old DstReg in the new coalesced
+ /// register.
+ unsigned DstIdx;
+
+ /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced
+ /// register.
+ unsigned SrcIdx;
/// Partial - True when the original copy was a partial subregister copy.
bool Partial;
@@ -52,12 +54,13 @@ namespace llvm {
bool Flipped;
/// NewRC - The register class of the coalesced register, or NULL if DstReg
- /// is a physreg.
+ /// is a physreg. This register class may be a super-register of both
+ /// SrcReg and DstReg.
const TargetRegisterClass *NewRC;
public:
- CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
- : TII(tii), TRI(tri), DstReg(0), SrcReg(0), SubIdx(0),
+ CoalescerPair(const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
/// setRegisters - set registers to match the copy instruction MI. Return
@@ -94,9 +97,13 @@ namespace llvm {
/// getSrcReg - Return the virtual register that will be coalesced away.
unsigned getSrcReg() const { return SrcReg; }
- /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
- /// coalesced into, or 0.
- unsigned getSubIdx() const { return SubIdx; }
+ /// getDstIdx - Return the subregister index that DstReg will be coalesced
+ /// into, or 0.
+ unsigned getDstIdx() const { return DstIdx; }
+
+ /// getSrcIdx - Return the subregister index that SrcReg will be coalesced
+ /// into, or 0.
+ unsigned getSrcIdx() const { return SrcIdx; }
/// getNewRC - Return the register class of the coalesced register.
const TargetRegisterClass *getNewRC() const { return NewRC; }
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
new file mode 100644
index 0000000..43448c8
--- /dev/null
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -0,0 +1,841 @@
+//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterPressure class which can be used to track
+// MachineInstr level register pressure.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// Increase register pressure for each set impacted by this register class.
+static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ std::vector<unsigned> &MaxSetPressure,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+ unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
+ for (const int *PSet = TRI->getRegClassPressureSets(RC);
+ *PSet != -1; ++PSet) {
+ CurrSetPressure[*PSet] += Weight;
+ if (&CurrSetPressure != &MaxSetPressure
+ && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) {
+ MaxSetPressure[*PSet] = CurrSetPressure[*PSet];
+ }
+ }
+}
+
+/// Decrease register pressure for each set impacted by this register class.
+static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+ unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
+ for (const int *PSet = TRI->getRegClassPressureSets(RC);
+ *PSet != -1; ++PSet) {
+ assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow");
+ CurrSetPressure[*PSet] -= Weight;
+ }
+}
+
+/// Directly increase pressure only within this RegisterPressure result.
+void RegisterPressure::increase(const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+ increaseSetPressure(MaxSetPressure, MaxSetPressure, RC, TRI);
+}
+
+/// Directly decrease pressure only within this RegisterPressure result.
+void RegisterPressure::decrease(const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+ decreaseSetPressure(MaxSetPressure, RC, TRI);
+}
+
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) {
+ dbgs() << "Live In: ";
+ for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
+ dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
+ dbgs() << '\n';
+ dbgs() << "Live Out: ";
+ for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
+ dbgs() << PrintReg(LiveOutRegs[i], TRI) << " ";
+ dbgs() << '\n';
+ for (unsigned i = 0, e = MaxSetPressure.size(); i < e; ++i) {
+ if (MaxSetPressure[i] != 0)
+ dbgs() << TRI->getRegPressureSetName(i) << "=" << MaxSetPressure[i]
+ << '\n';
+ }
+}
+
+/// Increase the current pressure as impacted by these physical registers and
+/// bump the high water mark if needed.
+void RegPressureTracker::increasePhysRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I)
+ increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+ TRI->getMinimalPhysRegClass(Regs[I]), TRI);
+}
+
+/// Simply decrease the current pressure as impacted by these physcial
+/// registers.
+void RegPressureTracker::decreasePhysRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I)
+ decreaseSetPressure(CurrSetPressure, TRI->getMinimalPhysRegClass(Regs[I]),
+ TRI);
+}
+
+/// Increase the current pressure as impacted by these virtual registers and
+/// bump the high water mark if needed.
+void RegPressureTracker::increaseVirtRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I)
+ increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+ MRI->getRegClass(Regs[I]), TRI);
+}
+
+/// Simply decrease the current pressure as impacted by these virtual registers.
+void RegPressureTracker::decreaseVirtRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I)
+ decreaseSetPressure(CurrSetPressure, MRI->getRegClass(Regs[I]), TRI);
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void IntervalPressure::reset() {
+ TopIdx = BottomIdx = SlotIndex();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void RegionPressure::reset() {
+ TopPos = BottomPos = MachineBasicBlock::const_iterator();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// If the current top is not less than or equal to the next index, open it.
+/// We happen to need the SlotIndex for the next top for pressure update.
+void IntervalPressure::openTop(SlotIndex NextTop) {
+ if (TopIdx <= NextTop)
+ return;
+ TopIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current top is the previous instruction (before receding), open it.
+void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) {
+ if (TopPos != PrevTop)
+ return;
+ TopPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is not greater than the previous index, open it.
+void IntervalPressure::openBottom(SlotIndex PrevBottom) {
+ if (BottomIdx > PrevBottom)
+ return;
+ BottomIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is the previous instr (before advancing), open it.
+void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
+ if (BottomPos != PrevBottom)
+ return;
+ BottomPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+/// Setup the RegPressureTracker.
+///
+/// TODO: Add support for pressure without LiveIntervals.
+void RegPressureTracker::init(const MachineFunction *mf,
+ const RegisterClassInfo *rci,
+ const LiveIntervals *lis,
+ const MachineBasicBlock *mbb,
+ MachineBasicBlock::const_iterator pos)
+{
+ MF = mf;
+ TRI = MF->getTarget().getRegisterInfo();
+ RCI = rci;
+ MRI = &MF->getRegInfo();
+ MBB = mbb;
+
+ if (RequireIntervals) {
+ assert(lis && "IntervalPressure requires LiveIntervals");
+ LIS = lis;
+ }
+
+ CurrPos = pos;
+ while (CurrPos != MBB->end() && CurrPos->isDebugValue())
+ ++CurrPos;
+
+ CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
+
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).reset();
+ else
+ static_cast<RegionPressure&>(P).reset();
+ P.MaxSetPressure = CurrSetPressure;
+
+ LivePhysRegs.clear();
+ LivePhysRegs.setUniverse(TRI->getNumRegs());
+ LiveVirtRegs.clear();
+ LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+}
+
+/// Does this pressure result have a valid top position and live ins.
+bool RegPressureTracker::isTopClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).TopIdx.isValid();
+ return (static_cast<RegionPressure&>(P).TopPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+/// Does this pressure result have a valid bottom position and live outs.
+bool RegPressureTracker::isBottomClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).BottomIdx.isValid();
+ return (static_cast<RegionPressure&>(P).BottomPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+/// Set the boundary for the top of the region and summarize live ins.
+void RegPressureTracker::closeTop() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).TopIdx =
+ LIS->getInstructionIndex(CurrPos).getRegSlot();
+ else
+ static_cast<RegionPressure&>(P).TopPos = CurrPos;
+
+ assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
+ P.LiveInRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
+ P.LiveInRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+ for (SparseSet<unsigned>::const_iterator I =
+ LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+ P.LiveInRegs.push_back(*I);
+ std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
+ P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
+ P.LiveInRegs.end());
+}
+
+/// Set the boundary for the bottom of the region and summarize live outs.
+void RegPressureTracker::closeBottom() {
+ if (RequireIntervals)
+ if (CurrPos == MBB->end())
+ static_cast<IntervalPressure&>(P).BottomIdx = LIS->getMBBEndIdx(MBB);
+ else
+ static_cast<IntervalPressure&>(P).BottomIdx =
+ LIS->getInstructionIndex(CurrPos).getRegSlot();
+ else
+ static_cast<RegionPressure&>(P).BottomPos = CurrPos;
+
+ assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
+ P.LiveOutRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
+ P.LiveOutRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+ for (SparseSet<unsigned>::const_iterator I =
+ LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+ P.LiveOutRegs.push_back(*I);
+ std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
+ P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
+ P.LiveOutRegs.end());
+}
+
+/// Finalize the region boundaries and record live ins and live outs.
+void RegPressureTracker::closeRegion() {
+ if (!isTopClosed() && !isBottomClosed()) {
+ assert(LivePhysRegs.empty() && LiveVirtRegs.empty() &&
+ "no region boundary");
+ return;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+ else if (!isTopClosed())
+ closeTop();
+ // If both top and bottom are closed, do nothing.
+}
+
+/// Return true if Reg aliases a register in Regs SparseSet.
+static bool hasRegAlias(unsigned Reg, SparseSet<unsigned> &Regs,
+ const TargetRegisterInfo *TRI) {
+ assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "only for physregs");
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (Regs.count(*AI))
+ return true;
+ return false;
+}
+
+/// Return true if Reg aliases a register in unsorted Regs SmallVector.
+/// This is only valid for physical registers.
+static SmallVectorImpl<unsigned>::iterator
+findRegAlias(unsigned Reg, SmallVectorImpl<unsigned> &Regs,
+ const TargetRegisterInfo *TRI) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ SmallVectorImpl<unsigned>::iterator I =
+ std::find(Regs.begin(), Regs.end(), *AI);
+ if (I != Regs.end())
+ return I;
+ }
+ return Regs.end();
+}
+
+/// Return true if Reg can be inserted into Regs SmallVector. For virtual
+/// register, do a linear search. For physical registers check for aliases.
+static SmallVectorImpl<unsigned>::iterator
+findReg(unsigned Reg, bool isVReg, SmallVectorImpl<unsigned> &Regs,
+ const TargetRegisterInfo *TRI) {
+ if(isVReg)
+ return std::find(Regs.begin(), Regs.end(), Reg);
+ return findRegAlias(Reg, Regs, TRI);
+}
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+template<bool isVReg>
+struct RegisterOperands {
+ SmallVector<unsigned, 8> Uses;
+ SmallVector<unsigned, 8> Defs;
+ SmallVector<unsigned, 8> DeadDefs;
+
+ /// Push this operand's register onto the correct vector.
+ void collect(const MachineOperand &MO, const TargetRegisterInfo *TRI) {
+ if (MO.readsReg()) {
+ if (findReg(MO.getReg(), isVReg, Uses, TRI) == Uses.end())
+ Uses.push_back(MO.getReg());
+ }
+ if (MO.isDef()) {
+ if (MO.isDead()) {
+ if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end())
+ DeadDefs.push_back(MO.getReg());
+ }
+ else {
+ if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
+ Defs.push_back(MO.getReg());
+ }
+ }
+ }
+};
+typedef RegisterOperands<false> PhysRegOperands;
+typedef RegisterOperands<true> VirtRegOperands;
+
+/// Collect physical and virtual register operands.
+static void collectOperands(const MachineInstr *MI,
+ PhysRegOperands &PhysRegOpers,
+ VirtRegOperands &VirtRegOpers,
+ const TargetRegisterInfo *TRI,
+ const RegisterClassInfo *RCI) {
+ for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
+ const MachineOperand &MO = *OperI;
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ VirtRegOpers.collect(MO, TRI);
+ else if (RCI->isAllocatable(MO.getReg()))
+ PhysRegOpers.collect(MO, TRI);
+ }
+ // Remove redundant physreg dead defs.
+ for (unsigned i = PhysRegOpers.DeadDefs.size(); i > 0; --i) {
+ unsigned Reg = PhysRegOpers.DeadDefs[i-1];
+ if (findRegAlias(Reg, PhysRegOpers.Defs, TRI) != PhysRegOpers.Defs.end())
+ PhysRegOpers.DeadDefs.erase(&PhysRegOpers.DeadDefs[i-1]);
+ }
+}
+
+/// Force liveness of registers.
+void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ if (TargetRegisterInfo::isVirtualRegister(Regs[i])) {
+ if (LiveVirtRegs.insert(Regs[i]).second)
+ increaseVirtRegPressure(Regs[i]);
+ }
+ else {
+ if (!hasRegAlias(Regs[i], LivePhysRegs, TRI)) {
+ LivePhysRegs.insert(Regs[i]);
+ increasePhysRegPressure(Regs[i]);
+ }
+ }
+ }
+}
+
+/// Add PhysReg to the live in set and increase max pressure.
+void RegPressureTracker::discoverPhysLiveIn(unsigned Reg) {
+ assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
+ if (findRegAlias(Reg, P.LiveInRegs, TRI) != P.LiveInRegs.end())
+ return;
+
+ // At live in discovery, unconditionally increase the high water mark.
+ P.LiveInRegs.push_back(Reg);
+ P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
+}
+
+/// Add PhysReg to the live out set and increase max pressure.
+void RegPressureTracker::discoverPhysLiveOut(unsigned Reg) {
+ assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
+ if (findRegAlias(Reg, P.LiveOutRegs, TRI) != P.LiveOutRegs.end())
+ return;
+
+ // At live out discovery, unconditionally increase the high water mark.
+ P.LiveOutRegs.push_back(Reg);
+ P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
+}
+
+/// Add VirtReg to the live in set and increase max pressure.
+void RegPressureTracker::discoverVirtLiveIn(unsigned Reg) {
+ assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
+ if (std::find(P.LiveInRegs.begin(), P.LiveInRegs.end(), Reg) !=
+ P.LiveInRegs.end())
+ return;
+
+ // At live in discovery, unconditionally increase the high water mark.
+ P.LiveInRegs.push_back(Reg);
+ P.increase(MRI->getRegClass(Reg), TRI);
+}
+
+/// Add VirtReg to the live out set and increase max pressure.
+void RegPressureTracker::discoverVirtLiveOut(unsigned Reg) {
+ assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
+ if (std::find(P.LiveOutRegs.begin(), P.LiveOutRegs.end(), Reg) !=
+ P.LiveOutRegs.end())
+ return;
+
+ // At live out discovery, unconditionally increase the high water mark.
+ P.LiveOutRegs.push_back(Reg);
+ P.increase(MRI->getRegClass(Reg), TRI);
+}
+
+/// Recede across the previous instruction.
+bool RegPressureTracker::recede() {
+ // Check for the top of the analyzable region.
+ if (CurrPos == MBB->begin()) {
+ closeRegion();
+ return false;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+
+ // Open the top of the region using block iterators.
+ if (!RequireIntervals && isTopClosed())
+ static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+ // Find the previous instruction.
+ do
+ --CurrPos;
+ while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+
+ if (CurrPos->isDebugValue()) {
+ closeRegion();
+ return false;
+ }
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+ // Open the top of the region using slot indexes.
+ if (RequireIntervals && isTopClosed())
+ static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+
+ PhysRegOperands PhysRegOpers;
+ VirtRegOperands VirtRegOpers;
+ collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+
+ // Boost pressure for all dead defs together.
+ increasePhysRegPressure(PhysRegOpers.DeadDefs);
+ increaseVirtRegPressure(VirtRegOpers.DeadDefs);
+ decreasePhysRegPressure(PhysRegOpers.DeadDefs);
+ decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ // TODO: consider earlyclobbers?
+ for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Defs[i];
+ if (LivePhysRegs.erase(Reg))
+ decreasePhysRegPressure(Reg);
+ else
+ discoverPhysLiveOut(Reg);
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Defs[i];
+ if (LiveVirtRegs.erase(Reg))
+ decreaseVirtRegPressure(Reg);
+ else
+ discoverVirtLiveOut(Reg);
+ }
+
+ // Generate liveness for uses.
+ for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Uses[i];
+ if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
+ increasePhysRegPressure(Reg);
+ LivePhysRegs.insert(Reg);
+ }
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Uses[i];
+ if (!LiveVirtRegs.count(Reg)) {
+ // Adjust liveouts if LiveIntervals are available.
+ if (RequireIntervals) {
+ const LiveInterval *LI = &LIS->getInterval(Reg);
+ if (!LI->killedAt(SlotIdx))
+ discoverVirtLiveOut(Reg);
+ }
+ increaseVirtRegPressure(Reg);
+ LiveVirtRegs.insert(Reg);
+ }
+ }
+ return true;
+}
+
+/// Advance across the current instruction.
+bool RegPressureTracker::advance() {
+ // Check for the bottom of the analyzable region.
+ if (CurrPos == MBB->end()) {
+ closeRegion();
+ return false;
+ }
+ if (!isTopClosed())
+ closeTop();
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+ // Open the bottom of the region using slot indexes.
+ if (isBottomClosed()) {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).openBottom(SlotIdx);
+ else
+ static_cast<RegionPressure&>(P).openBottom(CurrPos);
+ }
+
+ PhysRegOperands PhysRegOpers;
+ VirtRegOperands VirtRegOpers;
+ collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+
+ // Kill liveness at last uses.
+ for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Uses[i];
+ if (!hasRegAlias(Reg, LivePhysRegs, TRI))
+ discoverPhysLiveIn(Reg);
+ else {
+ // Allocatable physregs are always single-use before regalloc.
+ decreasePhysRegPressure(Reg);
+ LivePhysRegs.erase(Reg);
+ }
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Uses[i];
+ if (RequireIntervals) {
+ const LiveInterval *LI = &LIS->getInterval(Reg);
+ if (LI->killedAt(SlotIdx)) {
+ if (LiveVirtRegs.erase(Reg))
+ decreaseVirtRegPressure(Reg);
+ else
+ discoverVirtLiveIn(Reg);
+ }
+ }
+ else if (!LiveVirtRegs.count(Reg)) {
+ discoverVirtLiveIn(Reg);
+ increaseVirtRegPressure(Reg);
+ }
+ }
+
+ // Generate liveness for defs.
+ for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Defs[i];
+ if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
+ increasePhysRegPressure(Reg);
+ LivePhysRegs.insert(Reg);
+ }
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Defs[i];
+ if (LiveVirtRegs.insert(Reg).second)
+ increaseVirtRegPressure(Reg);
+ }
+
+ // Boost pressure for all dead defs together.
+ increasePhysRegPressure(PhysRegOpers.DeadDefs);
+ increaseVirtRegPressure(VirtRegOpers.DeadDefs);
+ decreasePhysRegPressure(PhysRegOpers.DeadDefs);
+ decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+
+ // Find the next instruction.
+ do
+ ++CurrPos;
+ while (CurrPos != MBB->end() && CurrPos->isDebugValue());
+ return true;
+}
+
+/// Find the max change in excess pressure across all sets.
+static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
+ ArrayRef<unsigned> NewPressureVec,
+ RegPressureDelta &Delta,
+ const TargetRegisterInfo *TRI) {
+ int ExcessUnits = 0;
+ unsigned PSetID = ~0U;
+ for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldPressureVec[i];
+ unsigned PNew = NewPressureVec[i];
+ int PDiff = (int)PNew - (int)POld;
+ if (!PDiff) // No change in this set in the common case.
+ continue;
+ // Only consider change beyond the limit.
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ if (Limit > POld) {
+ if (Limit > PNew)
+ PDiff = 0; // Under the limit
+ else
+ PDiff = PNew - Limit; // Just exceeded limit.
+ }
+ else if (Limit > PNew)
+ PDiff = Limit - POld; // Just obeyed limit.
+
+ if (std::abs(PDiff) > std::abs(ExcessUnits)) {
+ ExcessUnits = PDiff;
+ PSetID = i;
+ }
+ }
+ Delta.Excess.PSetID = PSetID;
+ Delta.Excess.UnitIncrease = ExcessUnits;
+}
+
+/// Find the max change in max pressure that either surpasses a critical PSet
+/// limit or exceeds the current MaxPressureLimit.
+///
+/// FIXME: comparing each element of the old and new MaxPressure vectors here is
+/// silly. It's done now to demonstrate the concept but will go away with a
+/// RegPressureTracker API change to work with pressure differences.
+static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
+ ArrayRef<unsigned> NewMaxPressureVec,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit,
+ RegPressureDelta &Delta) {
+ Delta.CriticalMax = PressureElement();
+ Delta.CurrentMax = PressureElement();
+
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldMaxPressureVec[i];
+ unsigned PNew = NewMaxPressureVec[i];
+ if (PNew == POld) // No change in this set in the common case.
+ continue;
+
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) {
+ int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease;
+ if (PDiff > Delta.CriticalMax.UnitIncrease) {
+ Delta.CriticalMax.PSetID = i;
+ Delta.CriticalMax.UnitIncrease = PDiff;
+ }
+ }
+
+ // Find the greatest increase above MaxPressureLimit.
+ // (Ignores negative MDiff).
+ int MDiff = (int)PNew - (int)MaxPressureLimit[i];
+ if (MDiff > Delta.CurrentMax.UnitIncrease) {
+ Delta.CurrentMax.PSetID = i;
+ Delta.CurrentMax.UnitIncrease = PNew;
+ }
+ }
+}
+
+/// Record the upward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+ // Account for register pressure similar to RegPressureTracker::recede().
+ PhysRegOperands PhysRegOpers;
+ VirtRegOperands VirtRegOpers;
+ collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+
+ // Boost max pressure for all dead defs together.
+ // Since CurrSetPressure and MaxSetPressure
+ increasePhysRegPressure(PhysRegOpers.DeadDefs);
+ increaseVirtRegPressure(VirtRegOpers.DeadDefs);
+ decreasePhysRegPressure(PhysRegOpers.DeadDefs);
+ decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ decreasePhysRegPressure(PhysRegOpers.Defs);
+ decreaseVirtRegPressure(VirtRegOpers.Defs);
+
+ // Generate liveness for uses.
+ for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Uses[i];
+ if (!hasRegAlias(Reg, LivePhysRegs, TRI))
+ increasePhysRegPressure(Reg);
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Uses[i];
+ if (!LiveVirtRegs.count(Reg))
+ increaseVirtRegPressure(Reg);
+ }
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// bottom-up. Find the pressure set with the most change beyond its pressure
+/// limit based on the tracker's current pressure, and return the change in
+/// number of register units of that pressure set introduced by this
+/// instruction.
+///
+/// This assumes that the current LiveOut set is sufficient.
+///
+/// FIXME: This is expensive for an on-the-fly query. We need to cache the
+/// result per-SUnit with enough information to adjust for the current
+/// scheduling position. But this works as a proof of concept.
+void RegPressureTracker::
+getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ // FIXME: The snapshot heap space should persist. But I'm planning to
+ // summarize the pressure effect so we don't need to snapshot at all.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.UnitIncrease >= 0 &&
+ Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
+static bool findUseBetween(unsigned Reg,
+ SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+ const MachineRegisterInfo *MRI,
+ const LiveIntervals *LIS) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
+ return true;
+ }
+ return false;
+}
+
+/// Record the downward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+ // Account for register pressure similar to RegPressureTracker::recede().
+ PhysRegOperands PhysRegOpers;
+ VirtRegOperands VirtRegOpers;
+ collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+
+ // Kill liveness at last uses. Assume allocatable physregs are single-use
+ // rather than checking LiveIntervals.
+ decreasePhysRegPressure(PhysRegOpers.Uses);
+ if (RequireIntervals) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
+ for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Uses[i];
+ const LiveInterval *LI = &LIS->getInterval(Reg);
+ // FIXME: allow the caller to pass in the list of vreg uses that remain to
+ // be bottom-scheduled to avoid searching uses at each query.
+ SlotIndex CurrIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+ if (LI->killedAt(SlotIdx)
+ && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
+ decreaseVirtRegPressure(Reg);
+ }
+ }
+ }
+
+ // Generate liveness for defs.
+ increasePhysRegPressure(PhysRegOpers.Defs);
+ increaseVirtRegPressure(VirtRegOpers.Defs);
+
+ // Boost pressure for all dead defs together.
+ increasePhysRegPressure(PhysRegOpers.DeadDefs);
+ increaseVirtRegPressure(VirtRegOpers.DeadDefs);
+ decreasePhysRegPressure(PhysRegOpers.DeadDefs);
+ decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// top-down. Find the register class with the most change in its pressure limit
+/// based on the tracker's current pressure, and return the number of excess
+/// register units of that pressure set introduced by this instruction.
+///
+/// This assumes that the current LiveIn set is sufficient.
+void RegPressureTracker::
+getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.UnitIncrease >= 0 &&
+ Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Get the pressure of each PSet after traversing this instruction bottom-up.
+void RegPressureTracker::
+getUpwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
+
+/// Get the pressure of each PSet after traversing this instruction top-down.
+void RegPressureTracker::
+getDownwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 03bd82e..d673794 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -37,16 +37,13 @@ using namespace llvm;
void RegScavenger::setUsed(unsigned Reg) {
RegsAvailable.reset(Reg);
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- RegsAvailable.reset(SubReg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RegsAvailable.reset(*SubRegs);
}
bool RegScavenger::isAliasUsed(unsigned Reg) const {
- if (isUsed(Reg))
- return true;
- for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R)
- if (isUsed(*R))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (isUsed(*AI))
return true;
return false;
}
@@ -114,8 +111,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
BV.set(Reg);
- for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
- BV.set(*R);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ BV.set(*SubRegs);
}
void RegScavenger::forward() {
@@ -195,9 +192,8 @@ void RegScavenger::forward() {
// Ideally we would like a way to model this, but leaving the
// insert_subreg around causes both correctness and performance issues.
bool SubUsed = false;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- if (isUsed(SubReg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ if (isUsed(*SubRegs)) {
SubUsed = true;
break;
}
@@ -296,9 +292,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
isVirtKillInsn = true;
continue;
}
- Candidates.reset(MO.getReg());
- for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++)
- Candidates.reset(*R);
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
}
// If we're not in a virtual reg's live range, this is a valid
// restore point.
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
deleted file mode 100644
index 6020908..0000000
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ /dev/null
@@ -1,1013 +0,0 @@
-//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "rendermf"
-
-#include "RenderMachineFunction.h"
-
-#include "VirtRegMap.h"
-
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include <sstream>
-
-using namespace llvm;
-
-char RenderMachineFunction::ID = 0;
-INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
- "Render machine functions (and related info) to HTML pages",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
- "Render machine functions (and related info) to HTML pages",
- false, false)
-
-static cl::opt<std::string>
-outputFileSuffix("rmf-file-suffix",
- cl::desc("Appended to function name to get output file name "
- "(default: \".html\")"),
- cl::init(".html"), cl::Hidden);
-
-static cl::opt<std::string>
-machineFuncsToRender("rmf-funcs",
- cl::desc("Comma separated list of functions to render"
- ", or \"*\"."),
- cl::init(""), cl::Hidden);
-
-static cl::opt<std::string>
-pressureClasses("rmf-classes",
- cl::desc("Register classes to render pressure for."),
- cl::init(""), cl::Hidden);
-
-static cl::opt<std::string>
-showIntervals("rmf-intervals",
- cl::desc("Live intervals to show alongside code."),
- cl::init(""), cl::Hidden);
-
-static cl::opt<bool>
-filterEmpty("rmf-filter-empty-intervals",
- cl::desc("Don't display empty intervals."),
- cl::init(true), cl::Hidden);
-
-static cl::opt<bool>
-showEmptyIndexes("rmf-empty-indexes",
- cl::desc("Render indexes not associated with instructions or "
- "MBB starts."),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-useFancyVerticals("rmf-fancy-verts",
- cl::desc("Use SVG for vertical text."),
- cl::init(true), cl::Hidden);
-
-static cl::opt<bool>
-prettyHTML("rmf-pretty-html",
- cl::desc("Pretty print HTML. For debugging the renderer only.."),
- cl::init(false), cl::Hidden);
-
-
-namespace llvm {
-
- bool MFRenderingOptions::renderingOptionsProcessed;
- std::set<std::string> MFRenderingOptions::mfNamesToRender;
- bool MFRenderingOptions::renderAllMFs = false;
-
- std::set<std::string> MFRenderingOptions::classNamesToRender;
- bool MFRenderingOptions::renderAllClasses = false;
-
- std::set<std::pair<unsigned, unsigned> >
- MFRenderingOptions::intervalNumsToRender;
- unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly;
-
- template <typename OutputItr>
- void MFRenderingOptions::splitComaSeperatedList(const std::string &s,
- OutputItr outItr) {
- std::string::const_iterator curPos = s.begin();
- std::string::const_iterator nextComa = std::find(curPos, s.end(), ',');
- while (nextComa != s.end()) {
- std::string elem;
- std::copy(curPos, nextComa, std::back_inserter(elem));
- *outItr = elem;
- ++outItr;
- curPos = llvm::next(nextComa);
- nextComa = std::find(curPos, s.end(), ',');
- }
-
- if (curPos != s.end()) {
- std::string elem;
- std::copy(curPos, s.end(), std::back_inserter(elem));
- *outItr = elem;
- ++outItr;
- }
- }
-
- void MFRenderingOptions::processOptions() {
- if (!renderingOptionsProcessed) {
- processFuncNames();
- processRegClassNames();
- processIntervalNumbers();
- renderingOptionsProcessed = true;
- }
- }
-
- void MFRenderingOptions::processFuncNames() {
- if (machineFuncsToRender == "*") {
- renderAllMFs = true;
- } else {
- splitComaSeperatedList(machineFuncsToRender,
- std::inserter(mfNamesToRender,
- mfNamesToRender.begin()));
- }
- }
-
- void MFRenderingOptions::processRegClassNames() {
- if (pressureClasses == "*") {
- renderAllClasses = true;
- } else {
- splitComaSeperatedList(pressureClasses,
- std::inserter(classNamesToRender,
- classNamesToRender.begin()));
- }
- }
-
- void MFRenderingOptions::processIntervalNumbers() {
- std::set<std::string> intervalRanges;
- splitComaSeperatedList(showIntervals,
- std::inserter(intervalRanges,
- intervalRanges.begin()));
- std::for_each(intervalRanges.begin(), intervalRanges.end(),
- processIntervalRange);
- }
-
- void MFRenderingOptions::processIntervalRange(
- const std::string &intervalRangeStr) {
- if (intervalRangeStr == "*") {
- intervalTypesToRender |= All;
- } else if (intervalRangeStr == "virt-nospills*") {
- intervalTypesToRender |= VirtNoSpills;
- } else if (intervalRangeStr == "spills*") {
- intervalTypesToRender |= VirtSpills;
- } else if (intervalRangeStr == "virt*") {
- intervalTypesToRender |= AllVirt;
- } else if (intervalRangeStr == "phys*") {
- intervalTypesToRender |= AllPhys;
- } else {
- std::istringstream iss(intervalRangeStr);
- unsigned reg1, reg2;
- if ((iss >> reg1 >> std::ws)) {
- if (iss.eof()) {
- intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1));
- } else {
- char c;
- iss >> c;
- if (c == '-' && (iss >> reg2)) {
- intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1));
- } else {
- dbgs() << "Warning: Invalid interval range \""
- << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
- }
- }
- } else {
- dbgs() << "Warning: Invalid interval number \""
- << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
- }
- }
- }
-
- void MFRenderingOptions::setup(MachineFunction *mf,
- const TargetRegisterInfo *tri,
- LiveIntervals *lis,
- const RenderMachineFunction *rmf) {
- this->mf = mf;
- this->tri = tri;
- this->lis = lis;
- this->rmf = rmf;
-
- clear();
- }
-
- void MFRenderingOptions::clear() {
- regClassesTranslatedToCurrentFunction = false;
- regClassSet.clear();
-
- intervalsTranslatedToCurrentFunction = false;
- intervalSet.clear();
- }
-
- void MFRenderingOptions::resetRenderSpecificOptions() {
- intervalSet.clear();
- intervalsTranslatedToCurrentFunction = false;
- }
-
- bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const {
- processOptions();
-
- return (renderAllMFs ||
- mfNamesToRender.find(mf->getFunction()->getName()) !=
- mfNamesToRender.end());
- }
-
- const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{
- translateRegClassNamesToCurrentFunction();
- return regClassSet;
- }
-
- const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const {
- translateIntervalNumbersToCurrentFunction();
- return intervalSet;
- }
-
- bool MFRenderingOptions::renderEmptyIndexes() const {
- return showEmptyIndexes;
- }
-
- bool MFRenderingOptions::fancyVerticals() const {
- return useFancyVerticals;
- }
-
- void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const {
- if (!regClassesTranslatedToCurrentFunction) {
- processOptions();
- for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
- if (renderAllClasses ||
- classNamesToRender.find(trc->getName()) !=
- classNamesToRender.end()) {
- regClassSet.insert(trc);
- }
- }
- regClassesTranslatedToCurrentFunction = true;
- }
- }
-
- void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const {
- if (!intervalsTranslatedToCurrentFunction) {
- processOptions();
-
- // If we're not just doing explicit then do a copy over all matching
- // types.
- if (intervalTypesToRender != ExplicitOnly) {
- for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval *li = liItr->second;
-
- if (filterEmpty && li->empty())
- continue;
-
- if ((TargetRegisterInfo::isPhysicalRegister(li->reg) &&
- (intervalTypesToRender & AllPhys))) {
- intervalSet.insert(li);
- } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) {
- if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) ||
- ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) {
- intervalSet.insert(li);
- }
- }
- }
- }
-
- // If we need to process the explicit list...
- if (intervalTypesToRender != All) {
- for (std::set<std::pair<unsigned, unsigned> >::const_iterator
- regRangeItr = intervalNumsToRender.begin(),
- regRangeEnd = intervalNumsToRender.end();
- regRangeItr != regRangeEnd; ++regRangeItr) {
- const std::pair<unsigned, unsigned> &range = *regRangeItr;
- for (unsigned reg = range.first; reg != range.second; ++reg) {
- if (lis->hasInterval(reg)) {
- intervalSet.insert(&lis->getInterval(reg));
- }
- }
- }
- }
-
- intervalsTranslatedToCurrentFunction = true;
- }
- }
-
- // ---------- TargetRegisterExtraInformation implementation ----------
-
- TargetRegisterExtraInfo::TargetRegisterExtraInfo()
- : mapsPopulated(false) {
- }
-
- void TargetRegisterExtraInfo::setup(MachineFunction *mf,
- MachineRegisterInfo *mri,
- const TargetRegisterInfo *tri,
- LiveIntervals *lis) {
- this->mf = mf;
- this->mri = mri;
- this->tri = tri;
- this->lis = lis;
- }
-
- void TargetRegisterExtraInfo::reset() {
- if (!mapsPopulated) {
- initWorst();
- //initBounds();
- initCapacity();
- mapsPopulated = true;
- }
-
- resetPressureAndLiveStates();
- }
-
- void TargetRegisterExtraInfo::clear() {
- prWorst.clear();
- vrWorst.clear();
- capacityMap.clear();
- pressureMap.clear();
- //liveStatesMap.clear();
- mapsPopulated = false;
- }
-
- void TargetRegisterExtraInfo::initWorst() {
- assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() &&
- "Worst map already initialised?");
-
- // Start with the physical registers.
- for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) {
- WorstMapLine &pregLine = prWorst[preg];
-
- for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
-
- unsigned numOverlaps = 0;
- for (TargetRegisterClass::iterator rItr = trc->begin(),
- rEnd = trc->end();
- rItr != rEnd; ++rItr) {
- unsigned trcPReg = *rItr;
- if (tri->regsOverlap(preg, trcPReg))
- ++numOverlaps;
- }
-
- pregLine[trc] = numOverlaps;
- }
- }
-
- // Now the register classes.
- for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rc1Itr != rcEnd; ++rc1Itr) {
- const TargetRegisterClass *trc1 = *rc1Itr;
- WorstMapLine &classLine = vrWorst[trc1];
-
- for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin();
- rc2Itr != rcEnd; ++rc2Itr) {
- const TargetRegisterClass *trc2 = *rc2Itr;
-
- unsigned worst = 0;
-
- for (TargetRegisterClass::iterator trc1Itr = trc1->begin(),
- trc1End = trc1->end();
- trc1Itr != trc1End; ++trc1Itr) {
- unsigned trc1Reg = *trc1Itr;
- unsigned trc1RegWorst = 0;
-
- for (TargetRegisterClass::iterator trc2Itr = trc2->begin(),
- trc2End = trc2->end();
- trc2Itr != trc2End; ++trc2Itr) {
- unsigned trc2Reg = *trc2Itr;
- if (tri->regsOverlap(trc1Reg, trc2Reg))
- ++trc1RegWorst;
- }
- if (trc1RegWorst > worst) {
- worst = trc1RegWorst;
- }
- }
-
- if (worst != 0) {
- classLine[trc2] = worst;
- }
- }
- }
- }
-
- unsigned TargetRegisterExtraInfo::getWorst(
- unsigned reg,
- const TargetRegisterClass *trc) const {
- const WorstMapLine *wml = 0;
- if (TargetRegisterInfo::isPhysicalRegister(reg)) {
- PRWorstMap::const_iterator prwItr = prWorst.find(reg);
- assert(prwItr != prWorst.end() && "Missing prWorst entry.");
- wml = &prwItr->second;
- } else {
- const TargetRegisterClass *regTRC = mri->getRegClass(reg);
- VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC);
- assert(vrwItr != vrWorst.end() && "Missing vrWorst entry.");
- wml = &vrwItr->second;
- }
-
- WorstMapLine::const_iterator wmlItr = wml->find(trc);
- if (wmlItr == wml->end())
- return 0;
-
- return wmlItr->second;
- }
-
- void TargetRegisterExtraInfo::initCapacity() {
- assert(!mapsPopulated && capacityMap.empty() &&
- "Capacity map already initialised?");
-
- for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
- unsigned capacity = trc->getRawAllocationOrder(*mf).size();
-
- if (capacity != 0)
- capacityMap[trc] = capacity;
- }
- }
-
- unsigned TargetRegisterExtraInfo::getCapacity(
- const TargetRegisterClass *trc) const {
- CapacityMap::const_iterator cmItr = capacityMap.find(trc);
- assert(cmItr != capacityMap.end() &&
- "vreg with unallocable register class");
- return cmItr->second;
- }
-
- void TargetRegisterExtraInfo::resetPressureAndLiveStates() {
- pressureMap.clear();
- //liveStatesMap.clear();
-
- // Iterate over all slots.
-
-
- // Iterate over all live intervals.
- for (LiveIntervals::iterator liItr = lis->begin(),
- liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval *li = liItr->second;
-
- if (TargetRegisterInfo::isPhysicalRegister(li->reg))
- continue;
-
- // For all ranges in the current interal.
- for (LiveInterval::iterator lrItr = li->begin(),
- lrEnd = li->end();
- lrItr != lrEnd; ++lrItr) {
- LiveRange *lr = &*lrItr;
-
- // For all slots in the current range.
- for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) {
-
- // Record increased pressure at index for all overlapping classes.
- for (TargetRegisterInfo::regclass_iterator
- rcItr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
-
- if (trc->getRawAllocationOrder(*mf).empty())
- continue;
-
- unsigned worstAtI = getWorst(li->reg, trc);
-
- if (worstAtI != 0) {
- pressureMap[i][trc] += worstAtI;
- }
- }
- }
- }
- }
- }
-
- unsigned TargetRegisterExtraInfo::getPressureAtSlot(
- const TargetRegisterClass *trc,
- SlotIndex i) const {
- PressureMap::const_iterator pmItr = pressureMap.find(i);
- if (pmItr == pressureMap.end())
- return 0;
- const PressureMapLine &pmLine = pmItr->second;
- PressureMapLine::const_iterator pmlItr = pmLine.find(trc);
- if (pmlItr == pmLine.end())
- return 0;
- return pmlItr->second;
- }
-
- bool TargetRegisterExtraInfo::classOverCapacityAtSlot(
- const TargetRegisterClass *trc,
- SlotIndex i) const {
- return (getPressureAtSlot(trc, i) > getCapacity(trc));
- }
-
- // ---------- MachineFunctionRenderer implementation ----------
-
- void RenderMachineFunction::Spacer::print(raw_ostream &os) const {
- if (!prettyHTML)
- return;
- for (unsigned i = 0; i < ns; ++i) {
- os << " ";
- }
- }
-
- RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const {
- return Spacer(ns);
- }
-
- raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) {
- s.print(os);
- return os;
- }
-
- template <typename Iterator>
- std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const {
- std::string r;
-
- for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) {
- char c = *sItr;
-
- switch (c) {
- case '<': r.append("&lt;"); break;
- case '>': r.append("&gt;"); break;
- case '&': r.append("&amp;"); break;
- case ' ': r.append("&nbsp;"); break;
- case '\"': r.append("&quot;"); break;
- default: r.push_back(c); break;
- }
- }
-
- return r;
- }
-
- RenderMachineFunction::LiveState
- RenderMachineFunction::getLiveStateAt(const LiveInterval *li,
- SlotIndex i) const {
- const MachineInstr *mi = sis->getInstructionFromIndex(i);
-
- // For uses/defs recorded use/def indexes override current liveness and
- // instruction operands (Only for the interval which records the indexes).
- // FIXME: This is all wrong, uses and defs share the same slots.
- if (i.isEarlyClobber() || i.isRegister()) {
- UseDefs::const_iterator udItr = useDefs.find(li);
- if (udItr != useDefs.end()) {
- const SlotSet &slotSet = udItr->second;
- if (slotSet.count(i)) {
- if (i.isEarlyClobber()) {
- return Used;
- }
- // else
- return Defined;
- }
- }
- }
-
- // If the slot is a load/store, or there's no info in the use/def set then
- // use liveness and instruction operand info.
- if (li->liveAt(i)) {
-
- if (mi == 0) {
- if (vrm == 0 ||
- (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
- return AliveReg;
- } else {
- return AliveStack;
- }
- } else {
- if (i.isRegister() && mi->definesRegister(li->reg, tri)) {
- return Defined;
- } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) {
- return Used;
- } else {
- if (vrm == 0 ||
- (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
- return AliveReg;
- } else {
- return AliveStack;
- }
- }
- }
- }
- return Dead;
- }
-
- RenderMachineFunction::PressureState
- RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc,
- SlotIndex i) const {
- if (trei.getPressureAtSlot(trc, i) == 0) {
- return Zero;
- } else if (trei.classOverCapacityAtSlot(trc, i)){
- return High;
- }
- return Low;
- }
-
- /// \brief Render a machine instruction.
- void RenderMachineFunction::renderMachineInstr(raw_ostream &os,
- const MachineInstr *mi) const {
- std::string s;
- raw_string_ostream oss(s);
- oss << *mi;
-
- os << escapeChars(oss.str());
- }
-
- template <typename T>
- void RenderMachineFunction::renderVertical(const Spacer &indent,
- raw_ostream &os,
- const T &t) const {
- if (ro.fancyVerticals()) {
- os << indent << "<object\n"
- << indent + s(2) << "class=\"obj\"\n"
- << indent + s(2) << "type=\"image/svg+xml\"\n"
- << indent + s(2) << "width=\"14px\"\n"
- << indent + s(2) << "height=\"55px\"\n"
- << indent + s(2) << "data=\"data:image/svg+xml,\n"
- << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n"
- << indent + s(6) << "<text x='-55' y='10' "
- "font-family='Courier' font-size='12' "
- "transform='rotate(-90)' "
- "text-rendering='optimizeSpeed' "
- "fill='#000'>" << t << "</text>\n"
- << indent + s(4) << "</svg>\">\n"
- << indent << "</object>\n";
- } else {
- std::ostringstream oss;
- oss << t;
- std::string tStr(oss.str());
-
- os << indent;
- for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end();
- tStrItr != tStrEnd; ++tStrItr) {
- os << *tStrItr << "<br/>";
- }
- os << "\n";
- }
- }
-
- void RenderMachineFunction::insertCSS(const Spacer &indent,
- raw_ostream &os) const {
- os << indent << "<style type=\"text/css\">\n"
- << indent + s(2) << "body { font-color: black; }\n"
- << indent + s(2) << "table.code td { font-family: monospace; "
- "border-width: 0px; border-style: solid; "
- "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n"
- << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n"
- << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n"
- << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n"
- << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n"
- << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n"
- << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n"
- << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n"
- << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n"
- << indent + s(2) << "table.code th { border-width: 0px; "
- "border-style: solid; }\n"
- << indent << "</style>\n";
- }
-
- void RenderMachineFunction::renderFunctionSummary(
- const Spacer &indent, raw_ostream &os,
- const char * const renderContextStr) const {
- os << indent << "<h1>Function: " << mf->getFunction()->getName()
- << "</h1>\n"
- << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n";
- }
-
-
- void RenderMachineFunction::renderPressureTableLegend(
- const Spacer &indent,
- raw_ostream &os) const {
- os << indent << "<h2>Rendering Pressure Legend:</h2>\n"
- << indent << "<table class=\"code\">\n"
- << indent + s(2) << "<tr>\n"
- << indent + s(4) << "<th>Pressure</th><th>Description</th>"
- "<th>Appearance</th>\n"
- << indent + s(2) << "</tr>\n"
- << indent + s(2) << "<tr>\n"
- << indent + s(4) << "<td>No Pressure</td>"
- "<td>No physical registers of this class requested.</td>"
- "<td class=\"p-z\">&nbsp;&nbsp;</td>\n"
- << indent + s(2) << "</tr>\n"
- << indent + s(2) << "<tr>\n"
- << indent + s(4) << "<td>Low Pressure</td>"
- "<td>Sufficient physical registers to meet demand.</td>"
- "<td class=\"p-l\">&nbsp;&nbsp;</td>\n"
- << indent + s(2) << "</tr>\n"
- << indent + s(2) << "<tr>\n"
- << indent + s(4) << "<td>High Pressure</td>"
- "<td>Potentially insufficient physical registers to meet demand.</td>"
- "<td class=\"p-h\">&nbsp;&nbsp;</td>\n"
- << indent + s(2) << "</tr>\n"
- << indent << "</table>\n";
- }
-
- template <typename CellType>
- void RenderMachineFunction::renderCellsWithRLE(
- const Spacer &indent, raw_ostream &os,
- const std::pair<CellType, unsigned> &rleAccumulator,
- const std::map<CellType, std::string> &cellTypeStrs) const {
-
- if (rleAccumulator.second == 0)
- return;
-
- typename std::map<CellType, std::string>::const_iterator ctsItr =
- cellTypeStrs.find(rleAccumulator.first);
-
- assert(ctsItr != cellTypeStrs.end() && "No string for given cell type.");
-
- os << indent + s(4) << "<td class=\"" << ctsItr->second << "\"";
- if (rleAccumulator.second > 1)
- os << " colspan=" << rleAccumulator.second;
- os << "></td>\n";
- }
-
-
- void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent,
- raw_ostream &os) const {
-
- std::map<LiveState, std::string> lsStrs;
- lsStrs[Dead] = "l-n";
- lsStrs[Defined] = "l-d";
- lsStrs[Used] = "l-u";
- lsStrs[AliveReg] = "l-r";
- lsStrs[AliveStack] = "l-s";
-
- std::map<PressureState, std::string> psStrs;
- psStrs[Zero] = "p-z";
- psStrs[Low] = "p-l";
- psStrs[High] = "p-h";
-
- // Open the table...
-
- os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n"
- << indent + s(2) << "<tr>\n";
-
- // Render the header row...
-
- os << indent + s(4) << "<th>index</th>\n"
- << indent + s(4) << "<th>instr</th>\n";
-
- // Render class names if necessary...
- if (!ro.regClasses().empty()) {
- for (MFRenderingOptions::RegClassSet::const_iterator
- rcItr = ro.regClasses().begin(),
- rcEnd = ro.regClasses().end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
- os << indent + s(4) << "<th>\n";
- renderVertical(indent + s(6), os, trc->getName());
- os << indent + s(4) << "</th>\n";
- }
- }
-
- // FIXME: Is there a nicer way to insert space between columns in HTML?
- if (!ro.regClasses().empty() && !ro.intervals().empty())
- os << indent + s(4) << "<th>&nbsp;&nbsp;</th>\n";
-
- // Render interval numbers if necessary...
- if (!ro.intervals().empty()) {
- for (MFRenderingOptions::IntervalSet::const_iterator
- liItr = ro.intervals().begin(),
- liEnd = ro.intervals().end();
- liItr != liEnd; ++liItr) {
-
- const LiveInterval *li = *liItr;
- os << indent + s(4) << "<th>\n";
- renderVertical(indent + s(6), os, li->reg);
- os << indent + s(4) << "</th>\n";
- }
- }
-
- os << indent + s(2) << "</tr>\n";
-
- // End header row, start with the data rows...
-
- MachineInstr *mi = 0;
-
- // Data rows:
- for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex();
- i = i.getNextSlot()) {
-
- // Render the slot column.
- os << indent + s(2) << "<tr height=6ex>\n";
-
- // Render the code column.
- if (i.isBlock()) {
- MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
- mi = sis->getInstructionFromIndex(i);
-
- if (i == sis->getMBBStartIdx(mbb) || mi != 0 ||
- ro.renderEmptyIndexes()) {
- os << indent + s(4) << "<td rowspan=4>" << i << "&nbsp;</td>\n"
- << indent + s(4) << "<td rowspan=4>\n";
-
- if (i == sis->getMBBStartIdx(mbb)) {
- os << indent + s(6) << "BB#" << mbb->getNumber() << ":&nbsp;\n";
- } else if (mi != 0) {
- os << indent + s(6) << "&nbsp;&nbsp;";
- renderMachineInstr(os, mi);
- } else {
- // Empty interval - leave blank.
- }
- os << indent + s(4) << "</td>\n";
- } else {
- i = i.getDeadSlot(); // <- Will be incremented to the next index.
- continue;
- }
- }
-
- // Render the class columns.
- if (!ro.regClasses().empty()) {
- std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0);
- for (MFRenderingOptions::RegClassSet::const_iterator
- rcItr = ro.regClasses().begin(),
- rcEnd = ro.regClasses().end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
- PressureState newPressure = getPressureStateAt(trc, i);
-
- if (newPressure == psRLEAccumulator.first) {
- ++psRLEAccumulator.second;
- } else {
- renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
- psRLEAccumulator.first = newPressure;
- psRLEAccumulator.second = 1;
- }
- }
- renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
- }
-
- // FIXME: Is there a nicer way to insert space between columns in HTML?
- if (!ro.regClasses().empty() && !ro.intervals().empty())
- os << indent + s(4) << "<td width=2em></td>\n";
-
- if (!ro.intervals().empty()) {
- std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0);
- for (MFRenderingOptions::IntervalSet::const_iterator
- liItr = ro.intervals().begin(),
- liEnd = ro.intervals().end();
- liItr != liEnd; ++liItr) {
- const LiveInterval *li = *liItr;
- LiveState newLiveness = getLiveStateAt(li, i);
-
- if (newLiveness == lsRLEAccumulator.first) {
- ++lsRLEAccumulator.second;
- } else {
- renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
- lsRLEAccumulator.first = newLiveness;
- lsRLEAccumulator.second = 1;
- }
- }
- renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
- }
- os << indent + s(2) << "</tr>\n";
- }
-
- os << indent << "</table>\n";
-
- if (!ro.regClasses().empty())
- renderPressureTableLegend(indent, os);
- }
-
- void RenderMachineFunction::renderFunctionPage(
- raw_ostream &os,
- const char * const renderContextStr) const {
- os << "<html>\n"
- << s(2) << "<head>\n"
- << s(4) << "<title>" << fqn << "</title>\n";
-
- insertCSS(s(4), os);
-
- os << s(2) << "<head>\n"
- << s(2) << "<body >\n";
-
- renderFunctionSummary(s(4), os, renderContextStr);
-
- os << s(4) << "<br/><br/><br/>\n";
-
- //renderLiveIntervalInfoTable(" ", os);
-
- os << s(4) << "<br/><br/><br/>\n";
-
- renderCodeTablePlusPI(s(4), os);
-
- os << s(2) << "</body>\n"
- << "</html>\n";
- }
-
- void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const {
- au.addRequired<SlotIndexes>();
- au.addRequired<LiveIntervals>();
- au.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(au);
- }
-
- bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) {
-
- mf = &fn;
- mri = &mf->getRegInfo();
- tri = mf->getTarget().getRegisterInfo();
- lis = &getAnalysis<LiveIntervals>();
- sis = &getAnalysis<SlotIndexes>();
-
- trei.setup(mf, mri, tri, lis);
- ro.setup(mf, tri, lis, this);
- spillIntervals.clear();
- spillFor.clear();
- useDefs.clear();
-
- fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
- mf->getFunction()->getName().str();
-
- return false;
- }
-
- void RenderMachineFunction::releaseMemory() {
- trei.clear();
- ro.clear();
- spillIntervals.clear();
- spillFor.clear();
- useDefs.clear();
- }
-
- void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) {
-
- if (!ro.shouldRenderCurrentMachineFunction())
- return;
-
- for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg),
- rEnd = mri->reg_end();
- rItr != rEnd; ++rItr) {
- const MachineInstr *mi = &*rItr;
- if (mi->readsRegister(li->reg)) {
- useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true));
- }
- if (mi->definesRegister(li->reg)) {
- useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot());
- }
- }
- }
-
- void RenderMachineFunction::rememberSpills(
- const LiveInterval *li,
- const std::vector<LiveInterval*> &spills) {
-
- if (!ro.shouldRenderCurrentMachineFunction())
- return;
-
- for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(),
- siEnd = spills.end();
- siItr != siEnd; ++siItr) {
- const LiveInterval *spill = *siItr;
- spillIntervals[li].insert(spill);
- spillFor[spill] = li;
- }
- }
-
- bool RenderMachineFunction::isSpill(const LiveInterval *li) const {
- SpillForMap::const_iterator sfItr = spillFor.find(li);
- if (sfItr == spillFor.end())
- return false;
- return true;
- }
-
- void RenderMachineFunction::renderMachineFunction(
- const char *renderContextStr,
- const VirtRegMap *vrm,
- const char *renderSuffix) {
- if (!ro.shouldRenderCurrentMachineFunction())
- return;
-
- this->vrm = vrm;
- trei.reset();
-
- std::string rpFileName(mf->getFunction()->getName().str() +
- (renderSuffix ? renderSuffix : "") +
- outputFileSuffix);
-
- std::string errMsg;
- raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary);
-
- renderFunctionPage(outFile, renderContextStr);
-
- ro.resetRenderSpecificOptions();
- }
-
- std::string RenderMachineFunction::escapeChars(const std::string &s) const {
- return escapeChars(s.begin(), s.end());
- }
-
-}
diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h
deleted file mode 100644
index 8571992..0000000
--- a/lib/CodeGen/RenderMachineFunction.h
+++ /dev/null
@@ -1,338 +0,0 @@
-//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
-#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
-
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-
-namespace llvm {
-
- class LiveInterval;
- class LiveIntervals;
- class MachineInstr;
- class MachineRegisterInfo;
- class RenderMachineFunction;
- class TargetRegisterClass;
- class TargetRegisterInfo;
- class VirtRegMap;
- class raw_ostream;
-
- /// \brief Helper class to process rendering options. Tries to be as lazy as
- /// possible.
- class MFRenderingOptions {
- public:
-
- struct RegClassComp {
- bool operator()(const TargetRegisterClass *trc1,
- const TargetRegisterClass *trc2) const {
- std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
- return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
- trc2Name.begin(), trc2Name.end());
- }
- };
-
- typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
-
- struct IntervalComp {
- bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
- return li1->reg < li2->reg;
- }
- };
-
- typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
-
- /// Initialise the rendering options.
- void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
- LiveIntervals *lis, const RenderMachineFunction *rmf);
-
- /// Clear translations of options to the current function.
- void clear();
-
- /// Reset any options computed for this specific rendering.
- void resetRenderSpecificOptions();
-
- /// Should we render the current function.
- bool shouldRenderCurrentMachineFunction() const;
-
- /// Return the set of register classes to render pressure for.
- const RegClassSet& regClasses() const;
-
- /// Return the set of live intervals to render liveness for.
- const IntervalSet& intervals() const;
-
- /// Render indexes which are not associated with instructions / MBB starts.
- bool renderEmptyIndexes() const;
-
- /// Return whether or not to render using SVG for fancy vertical text.
- bool fancyVerticals() const;
-
- private:
-
- static bool renderingOptionsProcessed;
- static std::set<std::string> mfNamesToRender;
- static bool renderAllMFs;
-
- static std::set<std::string> classNamesToRender;
- static bool renderAllClasses;
-
-
- static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
- typedef enum { ExplicitOnly = 0,
- AllPhys = 1,
- VirtNoSpills = 2,
- VirtSpills = 4,
- AllVirt = 6,
- All = 7 }
- IntervalTypesToRender;
- static unsigned intervalTypesToRender;
-
- template <typename OutputItr>
- static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
-
- static void processOptions();
-
- static void processFuncNames();
- static void processRegClassNames();
- static void processIntervalNumbers();
-
- static void processIntervalRange(const std::string &intervalRangeStr);
-
- MachineFunction *mf;
- const TargetRegisterInfo *tri;
- LiveIntervals *lis;
- const RenderMachineFunction *rmf;
-
- mutable bool regClassesTranslatedToCurrentFunction;
- mutable RegClassSet regClassSet;
-
- mutable bool intervalsTranslatedToCurrentFunction;
- mutable IntervalSet intervalSet;
-
- void translateRegClassNamesToCurrentFunction() const;
-
- void translateIntervalNumbersToCurrentFunction() const;
- };
-
- /// \brief Provide extra information about the physical and virtual registers
- /// in the function being compiled.
- class TargetRegisterExtraInfo {
- public:
- TargetRegisterExtraInfo();
-
- /// \brief Set up TargetRegisterExtraInfo with pointers to necessary
- /// sources of information.
- void setup(MachineFunction *mf, MachineRegisterInfo *mri,
- const TargetRegisterInfo *tri, LiveIntervals *lis);
-
- /// \brief Recompute tables for changed function.
- void reset();
-
- /// \brief Free all tables in TargetRegisterExtraInfo.
- void clear();
-
- /// \brief Maximum number of registers from trc which alias reg.
- unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const;
-
- /// \brief Returns the number of allocable registers in trc.
- unsigned getCapacity(const TargetRegisterClass *trc) const;
-
- /// \brief Return the number of registers of class trc that may be
- /// needed at slot i.
- unsigned getPressureAtSlot(const TargetRegisterClass *trc,
- SlotIndex i) const;
-
- /// \brief Return true if the number of registers of type trc that may be
- /// needed at slot i is greater than the capacity of trc.
- bool classOverCapacityAtSlot(const TargetRegisterClass *trc,
- SlotIndex i) const;
-
- private:
-
- MachineFunction *mf;
- MachineRegisterInfo *mri;
- const TargetRegisterInfo *tri;
- LiveIntervals *lis;
-
- typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine;
- typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap;
- VRWorstMap vrWorst;
-
- typedef std::map<unsigned, WorstMapLine> PRWorstMap;
- PRWorstMap prWorst;
-
- typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap;
- CapacityMap capacityMap;
-
- typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine;
- typedef std::map<SlotIndex, PressureMapLine> PressureMap;
- PressureMap pressureMap;
-
- bool mapsPopulated;
-
- /// \brief Initialise the 'worst' table.
- void initWorst();
-
- /// \brief Initialise the 'capacity' table.
- void initCapacity();
-
- /// \brief Initialise/Reset the 'pressure' and live states tables.
- void resetPressureAndLiveStates();
- };
-
- /// \brief Render MachineFunction objects and related information to a HTML
- /// page.
- class RenderMachineFunction : public MachineFunctionPass {
- public:
- static char ID;
-
- RenderMachineFunction() : MachineFunctionPass(ID) {
- initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &au) const;
-
- virtual bool runOnMachineFunction(MachineFunction &fn);
-
- virtual void releaseMemory();
-
- void rememberUseDefs(const LiveInterval *li);
-
- void rememberSpills(const LiveInterval *li,
- const std::vector<LiveInterval*> &spills);
-
- bool isSpill(const LiveInterval *li) const;
-
- /// \brief Render this machine function to HTML.
- ///
- /// @param renderContextStr This parameter will be included in the top of
- /// the html file to explain where (in the
- /// codegen pipeline) this function was rendered
- /// from. Set it to something like
- /// "Pre-register-allocation".
- /// @param vrm If non-null the VRM will be queried to determine
- /// whether a virtual register was allocated to a
- /// physical register or spilled.
- /// @param renderFilePrefix This string will be appended to the function
- /// name (before the output file suffix) to enable
- /// multiple renderings from the same function.
- void renderMachineFunction(const char *renderContextStr,
- const VirtRegMap *vrm = 0,
- const char *renderSuffix = 0);
-
- private:
- class Spacer;
- friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s);
-
- std::string fqn;
-
- MachineFunction *mf;
- MachineRegisterInfo *mri;
- const TargetRegisterInfo *tri;
- LiveIntervals *lis;
- SlotIndexes *sis;
- const VirtRegMap *vrm;
-
- TargetRegisterExtraInfo trei;
- MFRenderingOptions ro;
-
-
-
- // Utilities.
- typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState;
- LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const;
-
- typedef enum { Zero, Low, High } PressureState;
- PressureState getPressureStateAt(const TargetRegisterClass *trc,
- SlotIndex i) const;
-
- typedef std::map<const LiveInterval*, std::set<const LiveInterval*> >
- SpillIntervals;
- SpillIntervals spillIntervals;
-
- typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap;
- SpillForMap spillFor;
-
- typedef std::set<SlotIndex> SlotSet;
- typedef std::map<const LiveInterval*, SlotSet> UseDefs;
- UseDefs useDefs;
-
- // ---------- Rendering methods ----------
-
- /// For inserting spaces when pretty printing.
- class Spacer {
- public:
- explicit Spacer(unsigned numSpaces) : ns(numSpaces) {}
- Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); }
- void print(raw_ostream &os) const;
- private:
- unsigned ns;
- };
-
- Spacer s(unsigned ns) const;
-
- template <typename Iterator>
- std::string escapeChars(Iterator sBegin, Iterator sEnd) const;
-
- /// \brief Render a machine instruction.
- void renderMachineInstr(raw_ostream &os,
- const MachineInstr *mi) const;
-
- /// \brief Render vertical text.
- template <typename T>
- void renderVertical(const Spacer &indent,
- raw_ostream &os,
- const T &t) const;
-
- /// \brief Insert CSS layout info.
- void insertCSS(const Spacer &indent,
- raw_ostream &os) const;
-
- /// \brief Render a brief summary of the function (including rendering
- /// context).
- void renderFunctionSummary(const Spacer &indent,
- raw_ostream &os,
- const char * const renderContextStr) const;
-
- /// \brief Render a legend for the pressure table.
- void renderPressureTableLegend(const Spacer &indent,
- raw_ostream &os) const;
-
- /// \brief Render a consecutive set of HTML cells of the same class using
- /// the colspan attribute for run-length encoding.
- template <typename CellType>
- void renderCellsWithRLE(
- const Spacer &indent, raw_ostream &os,
- const std::pair<CellType, unsigned> &rleAccumulator,
- const std::map<CellType, std::string> &cellTypeStrs) const;
-
- /// \brief Render code listing, potentially with register pressure
- /// and live intervals shown alongside.
- void renderCodeTablePlusPI(const Spacer &indent,
- raw_ostream &os) const;
-
- /// \brief Render the HTML page representing the MachineFunction.
- void renderFunctionPage(raw_ostream &os,
- const char * const renderContextStr) const;
-
- std::string escapeChars(const std::string &s) const;
- };
-}
-
-#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 8fd6426..752f8e4 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -64,10 +64,27 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
/// specified node.
bool SUnit::addPred(const SDep &D) {
// If this node already has this depenence, don't add a redundant one.
- for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I)
- if (*I == D)
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ if (I->overlaps(D)) {
+ // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
+ if (I->getLatency() < D.getLatency()) {
+ SUnit *PredSU = I->getSUnit();
+ // Find the corresponding successor in N.
+ SDep ForwardD = *I;
+ ForwardD.setSUnit(this);
+ for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ if (*II == ForwardD) {
+ II->setLatency(D.getLatency());
+ break;
+ }
+ }
+ I->setLatency(D.getLatency());
+ }
return false;
+ }
+ }
// Now add a corresponding succ to N.
SDep P = D;
P.setSUnit(this);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index d46eb89..110f478 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -21,17 +21,24 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
+static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable use of AA during MI GAD construction"));
+
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt,
@@ -40,7 +47,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
- LoopRegs(MLI, MDT), FirstDbgValue(0) {
+ LoopRegs(MDT), FirstDbgValue(0) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
@@ -126,7 +133,8 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
return 0;
}
-void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
+ BB = bb;
LoopRegs.Deps.clear();
if (MachineLoop *ML = MLI.getLoopFor(BB))
if (BB == ML->getLoopLatch())
@@ -134,7 +142,8 @@ void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
}
void ScheduleDAGInstrs::finishBlock() {
- // Nothing to do.
+ // Subclasses should no longer refer to the old block.
+ BB = 0;
}
/// Initialize the map with the number of registers.
@@ -159,7 +168,7 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned endcount) {
- BB = bb;
+ assert(bb == BB && "startBlock should set BB");
RegionBegin = begin;
RegionEnd = end;
EndIndex = endcount;
@@ -232,7 +241,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
unsigned DataLatency = SU->Latency;
- for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
continue;
std::vector<SUnit*> &UseList = Uses[*Alias];
@@ -261,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
// Adjust the dependence latency using operand def/use
// information (if any), and then allow the target to
// perform its own adjustments.
- const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+ SDep dep(SU, SDep::Data, LDataLatency, *Alias);
if (!UnitLatencies) {
- computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+ unsigned Latency = computeOperandLatency(SU, UseSU, dep);
+ dep.setLatency(Latency);
+
+ ST.adjustSchedDependency(SU, UseSU, dep);
}
UseSU->addPred(dep);
}
@@ -285,7 +297,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
// TODO: Using a latency of 1 here for output dependencies assumes
// there's no cost for reusing registers.
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
- for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
if (!Defs.contains(*Alias))
continue;
std::vector<SUnit *> &DefList = Defs[*Alias];
@@ -400,8 +413,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// SSA defs do not have output/anti dependencies.
// The current operand is a def, so we have at least one.
- if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
- return;
+ //
+ // FIXME: This optimization is disabled pending PR13112.
+ //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
+ // return;
// Add output dependence to the next nearest def of this vreg.
//
@@ -410,7 +425,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// uses. We're conservative for now until we have a way to guarantee the uses
// are not eliminated sometime during scheduling. The output dependence edge
// is also useful if output latency exceeds def-use latency.
- VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+ VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI == VRegDefs.end())
VRegDefs.insert(VReg2SUnit(Reg, SU));
else {
@@ -436,10 +451,11 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
// Lookup this operand's reaching definition.
assert(LIS && "vreg dependencies requires LiveIntervals");
- SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot();
- LiveInterval *LI = &LIS->getInterval(Reg);
- VNInfo *VNI = LI->getVNInfoBefore(UseIdx);
+ LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
+ VNInfo *VNI = LRQ.valueIn();
+
// VNI will be valid because MachineOperand::readsReg() is checked by caller.
+ assert(VNI && "No value to read by operand");
MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
// Phis and other noninstructions (after coalescing) have a NULL Def.
if (Def) {
@@ -449,11 +465,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
// Create a data dependence.
//
// TODO: Handle "special" address latencies cleanly.
- const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+ SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
if (!UnitLatencies) {
// Adjust the dependence latency using operand def/use information, then
// allow the target to perform its own adjustments.
- computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ dep.setLatency(Latency);
+
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
}
@@ -462,11 +480,217 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
}
// Add antidependence to the following def of the vreg it uses.
- VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+ VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI != VRegDefs.end() && DefI->SU != SU)
DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
}
+/// Return true if MI is an instruction we are unable to reason about
+/// (like a call or something with unmodeled side effects).
+static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
+ if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasVolatileMemoryRef() &&
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
+ return true;
+ return false;
+}
+
+// This MI might have either incomplete info, or known to be unsafe
+// to deal with (i.e. volatile object).
+static inline bool isUnsafeMemoryObject(MachineInstr *MI,
+ const MachineFrameInfo *MFI) {
+ if (!MI || MI->memoperands_empty())
+ return true;
+ // We purposefully do no check for hasOneMemOperand() here
+ // in hope to trigger an assert downstream in order to
+ // finish implementation.
+ if ((*MI->memoperands_begin())->isVolatile() ||
+ MI->hasUnmodeledSideEffects())
+ return true;
+
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return true;
+
+ V = getUnderlyingObject(V);
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // Similarly to getUnderlyingObjectForInstr:
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return true;
+ }
+ // Does this pointer refer to a distinct and identifiable object?
+ if (!isIdentifiedObject(V))
+ return true;
+
+ return false;
+}
+
+/// This returns true if the two MIs need a chain edge betwee them.
+/// If these are not even memory operations, we still may need
+/// chain deps between them. The question really is - could
+/// these two MIs be reordered during scheduling from memory dependency
+/// point of view.
+static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ MachineInstr *MIa,
+ MachineInstr *MIb) {
+ // Cover a trivial case - no edge is need to itself.
+ if (MIa == MIb)
+ return false;
+
+ if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
+ return true;
+
+ // If we are dealing with two "normal" loads, we do not need an edge
+ // between them - they could be reordered.
+ if (!MIa->mayStore() && !MIb->mayStore())
+ return false;
+
+ // To this point analysis is generic. From here on we do need AA.
+ if (!AA)
+ return true;
+
+ MachineMemOperand *MMOa = *MIa->memoperands_begin();
+ MachineMemOperand *MMOb = *MIb->memoperands_begin();
+
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+ llvm_unreachable("Multiple memory operands.");
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // FIXME: Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
+ int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
+ int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+
+ AliasAnalysis::AliasResult AAResult = AA->alias(
+ AliasAnalysis::Location(MMOa->getValue(), Overlapa,
+ MMOa->getTBAAInfo()),
+ AliasAnalysis::Location(MMOb->getValue(), Overlapb,
+ MMOb->getTBAAInfo()));
+
+ return (AAResult != AliasAnalysis::NoAlias);
+}
+
+/// This recursive function iterates over chain deps of SUb looking for
+/// "latest" node that needs a chain edge to SUa.
+static unsigned
+iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
+ SmallPtrSet<const SUnit*, 16> &Visited) {
+ if (!SUa || !SUb || SUb == ExitSU)
+ return *Depth;
+
+ // Remember visited nodes.
+ if (!Visited.insert(SUb))
+ return *Depth;
+ // If there is _some_ dependency already in place, do not
+ // descend any further.
+ // TODO: Need to make sure that if that dependency got eliminated or ignored
+ // for any reason in the future, we would not violate DAG topology.
+ // Currently it does not happen, but makes an implicit assumption about
+ // future implementation.
+ //
+ // Independently, if we encounter node that is some sort of global
+ // object (like a call) we already have full set of dependencies to it
+ // and we can stop descending.
+ if (SUa->isSucc(SUb) ||
+ isGlobalMemoryObject(AA, SUb->getInstr()))
+ return *Depth;
+
+ // If we do need an edge, or we have exceeded depth budget,
+ // add that edge to the predecessors chain of SUb,
+ // and stop descending.
+ if (*Depth > 200 ||
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ return *Depth;
+ }
+ // Track current depth.
+ (*Depth)++;
+ // Iterate over chain dependencies only.
+ for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+ return *Depth;
+}
+
+/// This function assumes that "downward" from SU there exist
+/// tail/leaf of already constructed DAG. It iterates downward and
+/// checks whether SU can be aliasing any node dominated
+/// by it.
+static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+ unsigned LatencyToLoad) {
+ if (!SU)
+ return;
+
+ SmallPtrSet<const SUnit*, 16> Visited;
+ unsigned Depth = 0;
+
+ for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
+ I != IE; ++I) {
+ if (SU == *I)
+ continue;
+ if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+ unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
+ (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ }
+ // Now go through all the chain successors and iterate from them.
+ // Keep track of visited nodes.
+ for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
+ JE = (*I)->Succs.end(); J != JE; ++J)
+ if (J->isCtrl())
+ iterateChainSucc (AA, MFI, SU, J->getSUnit(),
+ ExitSU, &Depth, Visited);
+ }
+}
+
+/// Check whether two objects need a chain edge, if so, add it
+/// otherwise remember the rejected SU.
+static inline
+void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SUa, SUnit *SUb,
+ std::set<SUnit *> &RejectList,
+ unsigned TrueMemOrderLatency = 0,
+ bool isNormalMemory = false) {
+ // If this is a false dependency,
+ // do not add the edge, but rememeber the rejected node.
+ if (!EnableAASchedMI ||
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr()))
+ SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0,
+ isNormalMemory));
+ else {
+ // Duplicate entries should be ignored.
+ RejectList.insert(SUb);
+ DEBUG(dbgs() << "\tReject chain dep between SU("
+ << SUa->NodeNum << ") and SU("
+ << SUb->NodeNum << ")\n");
+ }
+}
+
/// Create an SUnit for each real instruction, numbered in top-down toplological
/// order. The instruction order A < B, implies that no edge exists from B to A.
///
@@ -502,7 +726,11 @@ void ScheduleDAGInstrs::initSUnits() {
}
}
-void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
+/// If RegPressure is non null, compute register pressure as a side effect. The
+/// DAG builder is an efficient place to do it because it already visits
+/// operands.
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
+ RegPressureTracker *RPTracker) {
// Create an SUnit for each real instruction.
initSUnits();
@@ -518,6 +746,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
// that are known not to alias
std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+ std::set<SUnit*> RejectMemNodes;
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
@@ -553,6 +782,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
PrevMI = MI;
continue;
}
+ if (RPTracker) {
+ RPTracker->recede();
+ assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
+ }
assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
"Cannot schedule terminators or labels!");
@@ -587,11 +820,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
// after stack slots are lowered to actual addresses.
// TODO: Use an AliasAnalysis and do real alias-analysis queries, and
// produce more precise dependence information.
-#define STORE_LOAD_LATENCY 1
- unsigned TrueMemOrderLatency = 0;
- if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasVolatileMemoryRef() &&
- (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
+ unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
+ if (isGlobalMemoryObject(AA, MI)) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
for (std::map<const Value *, SUnit *>::iterator I =
@@ -603,36 +833,48 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
}
- NonAliasMemDefs.clear();
- NonAliasMemUses.clear();
// Add SU to the barrier chain.
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
BarrierChain = SU;
+ // This is a barrier event that acts as a pivotal node in the DAG,
+ // so it is safe to clear list of exposed nodes.
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ RejectMemNodes.clear();
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
// fall-through
new_alias_chain:
// Chain all possibly aliasing memory references though SU.
- if (AliasChain)
- AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ if (AliasChain) {
+ unsigned ChainLatency = 0;
+ if (AliasChain->getInstr()->mayLoad())
+ ChainLatency = TrueMemOrderLatency;
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+ ChainLatency);
+ }
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
- E = AliasMemDefs.end(); I != E; ++I) {
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- }
+ E = AliasMemDefs.end(); I != E; ++I)
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
+ TrueMemOrderLatency);
}
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
PendingLoads.clear();
AliasMemDefs.clear();
AliasMemUses.clear();
} else if (MI->mayStore()) {
bool MayAlias = true;
- TrueMemOrderLatency = STORE_LOAD_LATENCY;
if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
// A store to a specific PseudoSourceValue. Add precise dependencies.
// Record the def in MemDefs, first adding a dep if there is
@@ -642,8 +884,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
std::map<const Value *, SUnit *>::iterator IE =
((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
- /*isNormalMemory=*/true));
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
+ 0, true);
I->second = SU;
} else {
if (MayAlias)
@@ -658,20 +900,28 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
- /*Reg=*/0, /*isNormalMemory=*/true));
+ addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
+ TrueMemOrderLatency, true);
J->second.clear();
}
if (MayAlias) {
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
- AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ // But we also should check dependent instructions for the
+ // SU in question.
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
}
// Add dependence on barrier chain, if needed.
+ // There is no point to check aliasing on barrier event. Even if
+ // SU and barrier _could_ be reordered, they should not. In addition,
+ // we have lost all RejectMemNodes below barrier.
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
} else {
@@ -688,7 +938,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
/*isArtificial=*/true));
} else if (MI->mayLoad()) {
bool MayAlias = true;
- TrueMemOrderLatency = 0;
if (MI->isInvariantLoad(AA)) {
// Invariant load, no chain dependencies needed!
} else {
@@ -700,8 +949,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
std::map<const Value *, SUnit *>::iterator IE =
((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
- /*isNormalMemory=*/true));
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
if (MayAlias)
AliasMemUses[V].push_back(SU);
else
@@ -711,15 +959,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
// potentially aliasing stores.
for (std::map<const Value *, SUnit *>::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
PendingLoads.push_back(SU);
MayAlias = true;
}
-
+ if (MayAlias)
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
- AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
}
@@ -735,8 +984,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
}
void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
- // Compute the latency for the node.
- if (!InstrItins || InstrItins->isEmpty()) {
+ // Compute the latency for the node. We only provide a default for missing
+ // itineraries. Empty itineraries still have latency properties.
+ if (!InstrItins) {
SU->Latency = 1;
// Simplistic target-independent heuristic: assume that loads take
@@ -748,63 +998,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
}
}
-void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const {
- if (!InstrItins || InstrItins->isEmpty())
- return;
-
+unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
+ const SDep& dep,
+ bool FindMin) const {
// For a data dependency with a known register...
if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
- return;
-
- const unsigned Reg = dep.getReg();
-
- // ... find the definition of the register in the defining
- // instruction
- MachineInstr *DefMI = Def->getInstr();
- int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
- if (DefIdx != -1) {
- const MachineOperand &MO = DefMI->getOperand(DefIdx);
- if (MO.isReg() && MO.isImplicit() &&
- DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
- // This is an implicit def, getOperandLatency() won't return the correct
- // latency. e.g.
- // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
- // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
- // What we want is to compute latency between def of %D6/%D7 and use of
- // %Q3 instead.
- unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
- if (DefMI->getOperand(Op2).isReg())
- DefIdx = Op2;
- }
- MachineInstr *UseMI = Use->getInstr();
- // For all uses of the register, calculate the maxmimum latency
- int Latency = -1;
- if (UseMI) {
- for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = UseMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (MOReg != Reg)
- continue;
-
- int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
- UseMI, i);
- Latency = std::max(Latency, UseCycle);
- }
- } else {
- // UseMI is null, then it must be a scheduling barrier.
- if (!InstrItins || InstrItins->isEmpty())
- return;
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
- }
+ return 1;
- // If we found a latency, then replace the existing dependence latency.
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
+ Use->getInstr(), dep.getReg(), FindMin);
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 3d22035..e675366 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -39,13 +39,11 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II,
DebugType = ParentDebugType;
#endif
- // Determine the maximum depth of any itinerary. This determines the
- // depth of the scoreboard. We always make the scoreboard at least 1
- // cycle deep to avoid dealing with the boundary condition.
+ // Determine the maximum depth of any itinerary. This determines the depth of
+ // the scoreboard. We always make the scoreboard at least 1 cycle deep to
+ // avoid dealing with the boundary condition.
unsigned ScoreboardDepth = 1;
if (ItinData && !ItinData->isEmpty()) {
- IssueWidth = ItinData->IssueWidth;
-
for (unsigned idx = 0; ; ++idx) {
if (ItinData->isEndMarker(idx))
break;
@@ -63,16 +61,26 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II,
// Find the next power-of-2 >= ItinDepth
while (ItinDepth > ScoreboardDepth) {
ScoreboardDepth *= 2;
+ // Don't set MaxLookAhead until we find at least one nonzero stage.
+ // This way, an itinerary with no stages has MaxLookAhead==0, which
+ // completely bypasses the scoreboard hazard logic.
+ MaxLookAhead = ScoreboardDepth;
}
}
- MaxLookAhead = ScoreboardDepth;
}
ReservedScoreboard.reset(ScoreboardDepth);
RequiredScoreboard.reset(ScoreboardDepth);
- DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
- << ScoreboardDepth << '\n');
+ // If MaxLookAhead is not set above, then we are not enabled.
+ if (!isEnabled())
+ DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
+ else {
+ // A nonempty itinerary must have a SchedModel.
+ IssueWidth = ItinData->SchedModel->IssueWidth;
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
+ }
}
void ScoreboardHazardRecognizer::Reset() {
@@ -151,7 +159,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
}
if (!freeUnits) {
- DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+ DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
DEBUG(DAG->dumpNode(SU));
return Hazard;
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index a6bdc3b..75e8167 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -23,3 +23,5 @@ add_llvm_library(LLVMSelectionDAG
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
+
+add_dependencies(LLVMSelectionDAG intrinsics_gen)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0914c66..747bc44 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -215,6 +215,7 @@ namespace {
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
+ SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
@@ -328,15 +329,12 @@ namespace {
class WorkListRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
- explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+ explicit WorkListRemover(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
virtual void NodeDeleted(SDNode *N, SDNode *E) {
DC.removeFromWorkList(N);
}
-
- virtual void NodeUpdated(SDNode *N) {
- // Ignore updates.
- }
};
}
@@ -619,8 +617,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
-
+ DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
@@ -650,7 +647,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorkList(TLO.New.getNode());
@@ -707,9 +704,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
Trunc.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
removeFromWorkList(Load);
DAG.DeleteNode(Load);
AddToWorkList(Trunc.getNode());
@@ -961,8 +957,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
removeFromWorkList(N);
DAG.DeleteNode(N);
AddToWorkList(Result.getNode());
@@ -1047,12 +1043,12 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
DAG.TransferDbgValues(SDValue(N, 0), RV);
WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
- DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+ DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
SDValue OpV = RV;
- DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+ DAG.ReplaceAllUsesWith(N, &OpV);
}
// Push the new node and any users onto the worklist
@@ -1131,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
+ case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
@@ -1325,10 +1322,12 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorkList(N);
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
} while (!N->use_empty());
removeFromWorkList(N);
DAG.DeleteNode(N);
@@ -1640,7 +1639,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
- N1.getOperand(0));
+ N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
@@ -2341,7 +2340,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
- && Level == AfterLegalizeVectorOps) {
+ && Level == AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
@@ -2528,7 +2527,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
Load->getOffset(), Load->getMemoryVT(),
Load->getMemOperand());
// Replace uses of the EXTLOAD with the new ZEXTLOAD.
- CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
}
// Fold the AND away, taking care not to fold to the old load node if we
@@ -2710,6 +2716,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+ }
+ }
+
+
return SDValue();
}
@@ -4526,8 +4560,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
}
return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
N0.getValueType().getScalarType());
@@ -5012,6 +5048,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT PtrType = N0.getOperand(1).getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ // It's not possible to generate a constant of extended or untyped type.
+ return SDValue();
+
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (TLI.isBigEndian()) {
@@ -5041,8 +5081,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// Replace the old load's chain with the new load's chain.
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
// Shift the result left, if we've swallowed a left shift.
SDValue Result = Load;
@@ -5225,7 +5264,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-
+ EVT IndexTy = N0->getOperand(1).getValueType();
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
@@ -5233,7 +5272,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
N->getDebugLoc(), TrTy, V,
- DAG.getConstant(Index, MVT::i32));
+ DAG.getConstant(Index, IndexTy));
}
}
@@ -5607,7 +5646,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (FoldedVOp.getNode()) return FoldedVOp;
}
- // fold (fadd c1, c2) -> (fadd c1, c2)
+ // fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
// canonicalize constant to RHS
@@ -5636,6 +5675,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // FADD -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegal(ISD::FMA, VT)) {
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+ }
+
return SDValue();
}
@@ -5673,9 +5732,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold
+ // (fsub x, x) -> 0.0 &
// (fsub x, (fadd x, y)) -> (fneg y) &
// (fsub x, (fadd y, x)) -> (fneg y)
if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N0 == N1)
+ return DAG.getConstantFP(0.0f, VT);
+
if (N1.getOpcode() == ISD::FADD) {
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
@@ -5689,6 +5752,29 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
+ // FSUB -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegal(ISD::FMA, VT)) {
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+ }
+ }
+
return SDValue();
}
@@ -5720,6 +5806,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (DAG.getTarget().Options.UnsafeFPMath &&
ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
+ // fold (fmul A, 1.0) -> A
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return N0;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
@@ -5753,6 +5842,26 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFMA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2);
+
+ // Canonicalize (fma c, x, y) -> (fma x, c, y)
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5893,6 +6002,38 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
@@ -5918,6 +6059,25 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
@@ -6185,7 +6345,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
// Replace the uses of SRL with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -6214,7 +6374,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Tmp.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
removeFromWorkList(TheXor);
DAG.DeleteNode(TheXor);
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
@@ -6240,7 +6400,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
@@ -6431,21 +6591,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
// Replace the uses of Ptr with uses of the updated base value.
- DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Ptr.getNode());
DAG.DeleteNode(Ptr.getNode());
@@ -6559,13 +6715,10 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
@@ -6573,8 +6726,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
- Result.getValue(isLoad ? 1 : 0),
- &DeadNodes);
+ Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Op);
DAG.DeleteNode(Op);
return true;
@@ -6609,7 +6761,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
Chain.getNode()->dump(&DAG);
dbgs() << "\n");
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
if (N->use_empty()) {
removeFromWorkList(N);
@@ -6629,11 +6781,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
Undef.getNode()->dump(&DAG);
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
- DAG.getUNDEF(N->getValueType(1)),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+ DAG.getUNDEF(N->getValueType(1)));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -6955,8 +7106,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
AddToWorkList(NewLD.getNode());
AddToWorkList(NewVal.getNode());
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
++OpsNarrowed;
return NewST;
}
@@ -7013,8 +7163,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
AddToWorkList(NewLD.getNode());
AddToWorkList(NewST.getNode());
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
++LdStFP2Int;
return NewST;
}
@@ -7058,7 +7207,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
SDValue Tmp;
switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP type");
- case MVT::f80: // We don't do this for these yet.
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
case MVT::f128:
case MVT::ppcf128:
break;
@@ -7323,8 +7473,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
OrigElt -= NumElem;
}
+ EVT IndexTy = N->getOperand(1).getValueType();
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
- InVec, DAG.getConstant(OrigElt, MVT::i32));
+ InVec, DAG.getConstant(OrigElt, IndexTy));
}
// Perform only after legalization to ensure build_vector / vector_shuffle
@@ -7472,7 +7623,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
WorkListRemover DeadNodes(*this);
SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
// Since we're explcitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
AddToWorkList(Load.getNode());
@@ -7489,6 +7640,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
@@ -7496,12 +7652,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// using shuffles.
EVT SourceType = MVT::Other;
bool AllAnyExt = true;
- bool AllUndef = true;
+
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.getOpcode() == ISD::UNDEF) continue;
- AllUndef = false;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
@@ -7529,9 +7684,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
AllAnyExt &= AnyExt;
}
- if (AllUndef)
- return DAG.getUNDEF(VT);
-
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
@@ -7707,6 +7859,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (N->getNumOperands() == 1)
return N->getOperand(0);
+ // Check if all of the operands are undefs.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(N->getValueType(0));
+
return SDValue();
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 0c1ac69..e5ea6e6 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -40,6 +40,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "isel"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
@@ -51,7 +52,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -484,7 +484,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
// N = N + Offset
- TotalOffs +=
+ TotalOffs +=
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
if (TotalOffs >= MaxOffs) {
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
@@ -573,7 +573,10 @@ bool FastISel::SelectCall(const User *I) {
// At -O0 we don't care about the lifetime intrinsics.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
+ // The donothing intrinsic does, well, nothing.
+ case Intrinsic::donothing:
return true;
+
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
@@ -642,7 +645,7 @@ bool FastISel::SelectCall(const User *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addCImm(CI).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
- else
+ else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addImm(CI->getZExtValue()).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
@@ -792,7 +795,7 @@ FastISel::SelectInstruction(const Instruction *I) {
DL = DebugLoc();
return true;
}
- // Remove dead code. However, ignore call instructions since we've flushed
+ // Remove dead code. However, ignore call instructions since we've flushed
// the local value map and recomputed the insert point.
if (!isa<CallInst>(I)) {
recomputeInsertPt();
@@ -1306,6 +1309,30 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm) {
@@ -1345,6 +1372,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
+ const TargetRegisterClass *RC = MRI.getRegClass(Op0);
+ MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0, getKillRegState(Op0IsKill), Idx);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 8dde919..3e18ea7 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -15,13 +15,13 @@
#define DEBUG_TYPE "function-lowering-info"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 1467d88..936c126 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -48,16 +48,31 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
return N;
}
-/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// countOperands - The inputs to target nodes have any actual inputs first,
/// followed by an optional chain operand, then an optional glue operand.
/// Compute the number of actual operands that will go into the resulting
/// MachineInstr.
-unsigned InstrEmitter::CountOperands(SDNode *Node) {
+///
+/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
+/// the chain and glue. These operands may be implicit on the machine instr.
+static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
unsigned N = Node->getNumOperands();
while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
--N; // Ignore chain if it exists.
+
+ // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
+ for (unsigned I = N; I; --I) {
+ if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
+ continue;
+ if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
+ if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ continue;
+ NumImpUses = N - I;
+ break;
+ }
+
return N;
}
@@ -114,8 +129,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (User->isMachineOpcode()) {
const MCInstrDesc &II = TII->get(User->getMachineOpcode());
const TargetRegisterClass *RC = 0;
- if (i+II.getNumDefs() < II.getNumOperands())
- RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
+ if (i+II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
+ }
if (!UseRC)
UseRC = RC;
else if (RC) {
@@ -196,7 +213,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
// is a vreg in the same register class, use the CopyToReg'd destination
// register instead of creating a new vreg.
unsigned VRBase = 0;
- const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI);
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
unsigned NumResults = CountResults(Node);
@@ -293,7 +311,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
if (II) {
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
- DstRC = TII->getRegClass(*II, IIOpNum, TRI);
+ DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
"Don't have operand info for this instruction!");
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
@@ -334,8 +352,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
/// AddOperand - Add the specified operand to the specified machine instr. II
/// specifies the instruction information for the node, and IIOpNum is the
-/// operand number (in the II) that we are adding. IIOpNum and II are used for
-/// assertions only.
+/// operand number (in the II) that we are adding.
void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
@@ -350,7 +367,11 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
const ConstantFP *CFP = F->getConstantFPValue();
MI->addOperand(MachineOperand::CreateFPImm(CFP));
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
- MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ // Turn additional physreg operands into implicit uses on non-variadic
+ // instructions. This is used by call and return instructions passing
+ // arguments in registers.
+ bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
+ MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp));
} else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
@@ -458,7 +479,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
unsigned SrcReg, DstReg, DefSubIdx;
if (DefMI &&
TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
- SubIdx == DefSubIdx) {
+ SubIdx == DefSubIdx &&
+ TRC == MRI->getRegClass(SrcReg)) {
// Optimize these:
// r1025 = s/zext r1024, 4
// r1026 = extract_subreg r1025, 4
@@ -467,6 +489,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
VRBase = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
} else {
// VReg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
@@ -548,7 +571,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
// Create the new VReg in the destination class and emit a copy.
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+ const TargetRegisterClass *DstRC =
+ TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg).addReg(VReg);
@@ -566,7 +590,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
bool IsClone, bool IsCloned) {
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
- unsigned NewVReg = MRI->createVirtualRegister(RC);
+ unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
unsigned NumOps = Node->getNumOperands();
@@ -691,7 +715,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
- unsigned NodeOperands = CountOperands(Node);
+ unsigned NumImpUses = 0;
+ unsigned NodeOperands = countOperands(Node, NumImpUses);
bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
@@ -700,7 +725,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
"Too few operands for a variadic node!");
else
assert(NumMIOperands >= II.getNumOperands() &&
- NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
+ NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
+ NumImpUses &&
"#operands for dag node doesn't match .td file!");
#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index c081f38..9eddee9 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -105,12 +105,6 @@ public:
/// (which do not go into the machine instrs.)
static unsigned CountResults(SDNode *Node);
- /// CountOperands - The inputs to target nodes have any actual inputs first,
- /// followed by an optional chain operand, then flag operands. Compute
- /// the number of actual operands that will go into the resulting
- /// MachineInstr.
- static unsigned CountOperands(SDNode *Node);
-
/// EmitDbgValue - Generate machine instruction for a dbg_value node.
///
MachineInstr *EmitDbgValue(SDDbgValue *SD,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a96a997..b0776af 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,7 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -20,10 +24,6 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -70,6 +70,9 @@ private:
SDValue OptimizeFloatStore(StoreSDNode *ST);
+ void LegalizeLoadOps(SDNode *Node);
+ void LegalizeStoreOps(SDNode *Node);
+
/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
@@ -150,21 +153,21 @@ public:
// Node replacement helpers
void ReplacedNode(SDNode *N) {
if (N->use_empty()) {
- DAG.RemoveDeadNode(N, this);
+ DAG.RemoveDeadNode(N);
} else {
ForgetNode(N);
}
}
void ReplaceNode(SDNode *Old, SDNode *New) {
- DAG.ReplaceAllUsesWith(Old, New, this);
+ DAG.ReplaceAllUsesWith(Old, New);
ReplacedNode(Old);
}
void ReplaceNode(SDValue Old, SDValue New) {
- DAG.ReplaceAllUsesWith(Old, New, this);
+ DAG.ReplaceAllUsesWith(Old, New);
ReplacedNode(Old.getNode());
}
void ReplaceNode(SDNode *Old, const SDValue *New) {
- DAG.ReplaceAllUsesWith(Old, New, this);
+ DAG.ReplaceAllUsesWith(Old, New);
ReplacedNode(Old);
}
};
@@ -203,7 +206,8 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
}
SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
- : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+ : SelectionDAG::DAGUpdateListener(dag),
+ TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
DAG(dag) {
}
@@ -638,9 +642,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
// probably means that we need to integrate dag combiner and legalizer
// together.
// We generally can't do this one for long doubles.
- SDValue Tmp1 = ST->getChain();
- SDValue Tmp2 = ST->getBasePtr();
- SDValue Tmp3;
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -648,19 +651,19 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
TLI.isTypeLegal(MVT::i32)) {
- Tmp3 = DAG.getConstant(CFP->getValueAPF().
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
}
if (CFP->getValueType(0) == MVT::f64) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (TLI.isTypeLegal(MVT::i64)) {
- Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
}
@@ -673,11 +676,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment);
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(4));
- Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
@@ -688,14 +691,448 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
return SDValue(0, 0);
}
+void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ DebugLoc dl = Node->getDebugLoc();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ if (!ST->isTruncatingStore()) {
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ {
+ SDValue Value = ST->getValue();
+ EVT VT = Value.getValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node),
+ DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ assert(VT.isVector() && "Unknown legal promote case!");
+ Value = DAG.getNode(ISD::BITCAST, dl,
+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+ } else {
+ SDValue Value = ST->getValue();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(TLI.isTypeLegal(StVT) &&
+ "Do not know how to expand this store!");
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ }
+}
+
+void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDValue Chain = LD->getChain(); // The chain.
+ SDValue Ptr = LD->getBasePtr(); // The base pointer.
+ SDValue Value; // The value returned by the load op.
+ DebugLoc dl = Node->getDebugLoc();
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ EVT VT = Node->getValueType(0);
+ SDValue RVal = SDValue(Node, 0);
+ SDValue RChain = SDValue(Node, 1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, RVal, RChain);
+ }
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(RVal, DAG);
+ if (Res.getNode()) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
+ }
+ case TargetLowering::Promote: {
+ // Only promote a load of vector type to another.
+ assert(VT.isVector() && "Cannot promote this load!");
+ // Change base type to a different vector type.
+ EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ RChain = Res.getValue(1);
+ break;
+ }
+ }
+ if (RChain.getNode() != Node) {
+ assert(RVal.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ ReplacedNode(Node);
+ }
+ return;
+ }
+
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Value = Result;
+ Chain = Ch;
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Chain = Ch;
+ } else {
+ bool isCustom = false;
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal: {
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Value, Chain);
+ }
+ }
+ }
+ break;
+ }
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ if (Chain.getNode() != Node) {
+ assert(Value.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ }
+}
+
/// LegalizeOp - Return a legal replacement for the given operation, with
/// all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
return;
- DebugLoc dl = Node->getDebugLoc();
-
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
TargetLowering::TypeLegal &&
@@ -708,9 +1145,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
- SDValue Tmp1, Tmp2, Tmp3, Tmp4;
- bool isCustom = false;
-
// Figure out the correct action; the way to query this varies by opcode
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
bool SimpleFinishLegalizing = true;
@@ -816,9 +1250,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
if (SimpleFinishLegalizing) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
- Ops.push_back(Node->getOperand(i));
+ SDNode *NewNode = Node;
switch (Node->getOpcode()) {
default: break;
case ISD::SHL:
@@ -828,11 +1260,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::ROTR:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[1].getValueType().isVector()) {
- SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]);
+ if (!Node->getOperand(1).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(1));
HandleSDNode Handle(SAO);
LegalizeOp(SAO.getNode());
- Ops[1] = Handle.getValue();
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Handle.getValue());
}
break;
case ISD::SRL_PARTS:
@@ -840,18 +1275,21 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SHL_PARTS:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[2].getValueType().isVector()) {
- SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]);
+ if (!Node->getOperand(2).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(2));
HandleSDNode Handle(SAO);
LegalizeOp(SAO.getNode());
- Ops[2] = Handle.getValue();
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Node->getOperand(1),
+ Handle.getValue());
}
break;
}
- SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
if (NewNode != Node) {
- DAG.ReplaceAllUsesWith(Node, NewNode, this);
+ DAG.ReplaceAllUsesWith(Node, NewNode);
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
ReplacedNode(Node);
@@ -860,27 +1298,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
switch (Action) {
case TargetLowering::Legal:
return;
- case TargetLowering::Custom:
+ case TargetLowering::Custom: {
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
- Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp1.getNode()) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
if (e == 1)
- ResultVals.push_back(Tmp1);
+ ResultVals.push_back(Res);
else
- ResultVals.push_back(Tmp1.getValue(i));
+ ResultVals.push_back(Res.getValue(i));
}
- if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) {
- DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this);
+ if (Res.getNode() != Node || Res.getResNo() != 0) {
+ DAG.ReplaceAllUsesWith(Node, ResultVals.data());
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
ReplacedNode(Node);
}
return;
}
-
+ }
// FALL THROUGH
case TargetLowering::Expand:
ExpandNode(Node);
@@ -904,428 +1342,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::CALLSEQ_END:
break;
case ISD::LOAD: {
- LoadSDNode *LD = cast<LoadSDNode>(Node);
- Tmp1 = LD->getChain(); // Legalize the chain.
- Tmp2 = LD->getBasePtr(); // Legalize the base pointer.
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD) {
- EVT VT = Node->getValueType(0);
- Tmp3 = SDValue(Node, 0);
- Tmp4 = SDValue(Node, 1);
-
- switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Tmp3, Tmp4);
- }
- }
- break;
- case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(Tmp3, DAG);
- if (Tmp1.getNode()) {
- Tmp3 = Tmp1;
- Tmp4 = Tmp1.getValue(1);
- }
- break;
- case TargetLowering::Promote: {
- // Only promote a load of vector type to another.
- assert(VT.isVector() && "Cannot promote this load!");
- // Change base type to a different vector type.
- EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
-
- Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
- Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1);
- Tmp4 = Tmp1.getValue(1);
- break;
- }
- }
- if (Tmp4.getNode() != Node) {
- assert(Tmp3.getNode() != Node && "Load must be completely replaced");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4);
- ReplacedNode(Node);
- }
- return;
- }
-
- EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
- unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
-
- if (SrcWidth != SrcVT.getStoreSizeInBits() &&
- // Some targets pretend to have an i1 loading operation, and actually
- // load an i8. This trick is correct for ZEXTLOAD because the top 7
- // bits are guaranteed to be zero; it helps the optimizers understand
- // that these bits are zero. It is also useful for EXTLOAD, since it
- // tells the optimizers that those bits are undefined. It would be
- // nice to have an effective generic way of getting these benefits...
- // Until such a way is found, don't insist on promoting i1 here.
- (SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
- // Promote to a byte-sized load if not loading an integral number of
- // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
- unsigned NewWidth = SrcVT.getStoreSizeInBits();
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
- SDValue Ch;
-
- // The extra bits are guaranteed to be zero, since we stored them that
- // way. A zext load from NVT thus automatically gives zext from SrcVT.
-
- ISD::LoadExtType NewExtType =
- ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-
- SDValue Result =
- DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
-
- Ch = Result.getValue(1); // The chain.
-
- if (ExtType == ISD::SEXTLOAD)
- // Having the top bits zero doesn't help when sign extending.
- Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
- // All the top bits are guaranteed to be zero - inform the optimizers.
- Result = DAG.getNode(ISD::AssertZext, dl,
- Result.getValueType(), Result,
- DAG.getValueType(SrcVT));
-
- Tmp1 = Result;
- Tmp2 = Ch;
- } else if (SrcWidth & (SrcWidth - 1)) {
- // If not loading a power-of-2 number of bits, expand as two loads.
- assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Load size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi, Ch;
- unsigned IncrementSize;
-
- if (TLI.isLittleEndian()) {
- // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
- // Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2,
- LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(RoundWidth,
- TLI.getShiftAmountTy(Hi.getValueType())));
-
- // Join the hi and lo parts.
- Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- } else {
- // Big endian - avoid unaligned loads.
- // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
- // Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
- dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(ExtraWidth,
- TLI.getShiftAmountTy(Hi.getValueType())));
-
- // Join the hi and lo parts.
- Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- }
-
- Tmp2 = Ch;
- } else {
- switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal:
- Tmp1 = SDValue(Node, 0);
- Tmp2 = SDValue(Node, 1);
-
- if (isCustom) {
- Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp3.getNode()) {
- Tmp1 = Tmp3;
- Tmp2 = Tmp3.getValue(1);
- }
- } else {
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Tmp1, Tmp2);
- }
- }
- }
- break;
- case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
- LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
- break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
- }
- Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp2 = Load.getValue(1);
- break;
- }
-
- assert(!SrcVT.isVector() &&
- "Vector Loads are handled in LegalizeVectorOps");
-
- // FIXME: This does not work for vectors on most targets. Sign- and
- // zero-extend operations are currently folded into extending loads,
- // whether they are legal or not, and then we end up here without any
- // support for legalizing them.
- assert(ExtType != ISD::EXTLOAD &&
- "EXTLOAD should always be supported!");
- // Turn the unsupported load into an EXTLOAD followed by an explicit
- // zero/sign extend inreg.
- SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- SDValue ValRes;
- if (ExtType == ISD::SEXTLOAD)
- ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
- Tmp1 = ValRes;
- Tmp2 = Result.getValue(1);
- break;
- }
- }
-
- // Since loads produce two values, make sure to remember that we legalized
- // both of them.
- if (Tmp2.getNode() != Node) {
- assert(Tmp1.getNode() != Node && "Load must be completely replaced");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2);
- ReplacedNode(Node);
- }
- break;
+ return LegalizeLoadOps(Node);
}
case ISD::STORE: {
- StoreSDNode *ST = cast<StoreSDNode>(Node);
- Tmp1 = ST->getChain();
- Tmp2 = ST->getBasePtr();
- unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
-
- if (!ST->isTruncatingStore()) {
- if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
- ReplaceNode(ST, OptStore);
- break;
- }
-
- {
- Tmp3 = ST->getValue();
- EVT VT = Tmp3.getValueType();
- switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node),
- DAG, TLI, this);
- }
- break;
- case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp1.getNode())
- ReplaceNode(SDValue(Node, 0), Tmp1);
- break;
- case TargetLowering::Promote: {
- assert(VT.isVector() && "Unknown legal promote case!");
- Tmp3 = DAG.getNode(ISD::BITCAST, dl,
- TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
- SDValue Result =
- DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
- }
- break;
- }
- } else {
- Tmp3 = ST->getValue();
-
- EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
-
- if (StWidth != StVT.getStoreSizeInBits()) {
- // Promote to a byte-sized store with upper bits zero if not
- // storing an integral number of bytes. For example, promote
- // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
- StVT.getStoreSizeInBits());
- Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- SDValue Result =
- DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- } else if (StWidth & (StWidth - 1)) {
- // If not storing a power-of-2 number of bits, expand as two stores.
- assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned RoundWidth = 1 << Log2_32(StWidth);
- assert(RoundWidth < StWidth);
- unsigned ExtraWidth = StWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Store size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi;
- unsigned IncrementSize;
-
- if (TLI.isLittleEndian()) {
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
- // Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- RoundVT,
- isVolatile, isNonTemporal, Alignment);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
- DAG.getConstant(RoundWidth,
- TLI.getShiftAmountTy(Tmp3.getValueType())));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- } else {
- // Big endian - avoid unaligned stores.
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
- // Store the top RoundWidth bits.
- Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
- DAG.getConstant(ExtraWidth,
- TLI.getShiftAmountTy(Tmp3.getValueType())));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
- RoundVT, isVolatile, isNonTemporal, Alignment);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- }
-
- // The order of the stores doesn't matter.
- SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- ReplaceNode(SDValue(Node, 0), Result);
- } else {
- switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
- break;
- case TargetLowering::Custom:
- ReplaceNode(SDValue(Node, 0),
- TLI.LowerOperation(SDValue(Node, 0), DAG));
- break;
- case TargetLowering::Expand:
- assert(!StVT.isVector() &&
- "Vector Stores are handled in LegalizeVectorOps");
-
- // TRUNCSTORE:i16 i32 -> STORE i16
- assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
- Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- SDValue Result =
- DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
- }
- }
- break;
+ return LegalizeStoreOps(Node);
}
}
}
@@ -1795,11 +1815,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
if (isTailCall)
InChain = TCChain;
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), isTailCall,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
if (!CallInfo.second.getNode())
// It's a tailcall, return the chain (which is the DAG root).
@@ -1828,11 +1850,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
TLI.getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
- std::pair<SDValue,SDValue> CallInfo =
- TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo.first;
}
@@ -1860,11 +1884,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
TLI.getPointerTy());
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo;
}
@@ -1919,9 +1944,11 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
return TLI.getLibcallName(LC) != 0;
}
-/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// useDivRem - Only issue divrem libcall if both quotient and remainder are
/// needed.
-static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+ // The other use might have been replaced with a divrem already.
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
unsigned OtherOpcode = 0;
if (isSigned)
OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
@@ -1935,7 +1962,7 @@ static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
SDNode *User = *UI;
if (User == Node)
continue;
- if (User->getOpcode() == OtherOpcode &&
+ if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) &&
User->getOperand(0) == Op0 &&
User->getOperand(1) == Op1)
return true;
@@ -1992,11 +2019,12 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
TLI.getPointerTy());
DebugLoc dl = Node->getDebugLoc();
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
// Remainder is loaded back from the stack frame.
SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
@@ -2570,14 +2598,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If the target didn't lower this, lower it to '__sync_synchronize()' call
// FIXME: handle "fence singlethread" more efficiently.
TargetLowering::ArgListTy Args;
- std::pair<SDValue, SDValue> CallResult =
- TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Node->getOperand(0),
+ Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
Results.push_back(CallResult.second);
break;
}
@@ -2647,13 +2678,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::TRAP: {
// If this operation is not supported, lower it to 'abort()' call
TargetLowering::ArgListTy Args;
- std::pair<SDValue, SDValue> CallResult =
- TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Node->getOperand(0),
+ Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
Results.push_back(CallResult.second);
break;
}
@@ -3059,7 +3093,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
"Don't know how to expand this subtraction!");
Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
- Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT));
Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
break;
}
@@ -3074,7 +3108,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp3 = Node->getOperand(1);
if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
(isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- UseDivRem(Node, isSigned, false))) {
+ useDivRem(Node, isSigned, false))) {
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
@@ -3102,7 +3136,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDVTList VTs = DAG.getVTList(VT, VT);
if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
(isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- UseDivRem(Node, isSigned, true)))
+ useDivRem(Node, isSigned, true)))
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
else if (isSigned)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 95ddb1e..e8e968a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -588,18 +588,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
unsigned NumElts = InVT.getVectorNumElements();
assert(NumElts == NVT.getVectorNumElements() &&
"Dst and Src must have the same number of elements");
- EVT EltVT = InVT.getScalarType();
assert(isPowerOf2_32(NumElts) &&
"Promoted vector type must be a power of two");
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+ SDValue EOp1, EOp2;
+ GetSplitVector(InOp, EOp1, EOp2);
+
EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
NumElts/2);
-
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
- DAG.getIntPtrConstant(0));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
- DAG.getIntPtrConstant(NumElts/2));
EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
@@ -2273,9 +2269,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
// A divide for UMULO will be faster than a function call. Select to
// make sure we aren't using 0.
SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
- RHS, DAG.getConstant(0, VT), ISD::SETNE);
+ RHS, DAG.getConstant(0, VT), ISD::SETNE);
SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
- DAG.getConstant(1, VT), RHS);
+ DAG.getConstant(1, VT), RHS);
SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
SDValue Overflow;
Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
@@ -2296,8 +2292,8 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
- DAG.getConstant(0, PtrVT), Temp,
- MachinePointerInfo(), false, false, 0);
+ DAG.getConstant(0, PtrVT), Temp,
+ MachinePointerInfo(), false, false, 0);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -2319,16 +2315,17 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
Args.push_back(Entry);
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
- 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Func, Args, DAG, dl);
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, RetTy, true, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
SplitInteger(CallInfo.first, Lo, Hi);
SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
- MachinePointerInfo(), false, false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
DAG.getConstant(0, PtrVT),
ISD::SETNE);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 439aa4d..39337ff 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -628,7 +628,8 @@ namespace {
public:
explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
SmallSetVector<SDNode*, 16> &nta)
- : DTL(dtl), NodesToAnalyze(nta) {}
+ : SelectionDAG::DAGUpdateListener(dtl.getDAG()),
+ DTL(dtl), NodesToAnalyze(nta) {}
virtual void NodeDeleted(SDNode *N, SDNode *E) {
assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
@@ -680,7 +681,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
SmallSetVector<SDNode*, 16> NodesToAnalyze;
NodeUpdateListener NUL(*this, NodesToAnalyze);
do {
- DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+ DAG.ReplaceAllUsesOfValueWith(From, To);
// The old node may still be present in a map like ExpandedIntegers or
// PromotedIntegers. Inform maps about the replacement.
@@ -709,7 +710,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
SDValue NewVal(M, i);
if (M->getNodeId() == Processed)
RemapValue(NewVal);
- DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
// OldVal may be a target of the ReplacedValues map which was marked
// NewNode to force reanalysis because it was updated. Ensure that
// anything that ReplacedValues mapped to OldVal will now be mapped
@@ -950,7 +951,7 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
if (i != ResNo)
ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
- return SDValue(N, ResNo);
+ return SDValue(N->getOperand(ResNo));
}
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
@@ -1054,12 +1055,14 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
TLI.getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
- std::pair<SDValue,SDValue> CallInfo =
- TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, TLI.getLibcallCallingConv(LC),
/*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
return CallInfo.first;
}
@@ -1086,11 +1089,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
TLI.getPointerTy());
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e866445..94fc976 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -135,6 +135,8 @@ public:
ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
}
+ SelectionDAG &getDAG() const { return DAG; }
+
private:
SDNode *AnalyzeNewNode(SDNode *N);
void AnalyzeNewValue(SDValue &Val);
@@ -151,7 +153,7 @@ private:
/// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES
/// node with the corresponding input operand, except for the result 'ResNo',
- /// which is returned.
+ /// for which the corresponding input operand is returned.
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
@@ -509,10 +511,12 @@ private:
void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
@@ -553,6 +557,7 @@ private:
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned OpNo);
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a8ff7c6..06f6bd6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -168,6 +168,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue OldVec = N->getOperand(0);
unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ EVT OldEltVT = OldVec.getValueType().getVectorElementType();
DebugLoc dl = N->getDebugLoc();
// Convert to a vector of the expanded element type, for example
@@ -175,6 +176,15 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT OldVT = N->getValueType(0);
EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ if (OldVT != OldEltVT) {
+ // The result of EXTRACT_VECTOR_ELT may be larger than the element type of
+ // the input vector. If so, extend the elements of the input vector to the
+ // same bitwidth as the result before expanding.
+ assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
+ }
+
SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
EVT::getVectorVT(*DAG.getContext(),
NewVT, 2*OldElts),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 9fe4480..704f99b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -71,6 +71,9 @@ class VectorLegalizer {
// operands to a different type and bitcasting the result back to the
// original type.
SDValue PromoteVectorOp(SDValue Op);
+ // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
+ // operand to the next size up.
+ SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
public:
bool Run();
@@ -231,9 +234,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
case TargetLowering::Promote:
- // "Promote" the operation by bitcasting
- Result = PromoteVectorOp(Op);
- Changed = true;
+ switch (Op.getOpcode()) {
+ default:
+ // "Promote" the operation by bitcasting
+ Result = PromoteVectorOp(Op);
+ Changed = true;
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ Result = PromoteVectorOpINT_TO_FP(Op);
+ Changed = true;
+ break;
+ }
break;
case TargetLowering::Legal: break;
case TargetLowering::Custom: {
@@ -293,6 +306,44 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
+SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
+ // INT_TO_FP operations may require the input operand be promoted even
+ // when the type is otherwise legal.
+ EVT VT = Op.getOperand(0).getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+
+ // Normal getTypeToPromoteTo() doesn't work here, as that will promote
+ // by widening the vector w/ the same element width and twice the number
+ // of elements. We want the other way around, the same number of elements,
+ // each twice the width.
+ //
+ // Increase the bitwidth of the element to the next pow-of-two
+ // (which is greater than 8 bits).
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
+ assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
+
+ // Build a new vector type and check if it is legal.
+ MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0],
+ Operands.size());
+}
+
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
DebugLoc dl = Op.getDebugLoc();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5f23f01..4709202 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -48,7 +48,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
- case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
@@ -115,6 +115,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SRL:
R = ScalarizeVecRes_BinOp(N);
break;
+ case ISD::FMA:
+ R = ScalarizeVecRes_TernaryOp(N);
+ break;
}
// If R is null, the sub-method took care of registering the result.
@@ -129,6 +132,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ Op0.getValueType(), Op0, Op1, Op2);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -141,6 +152,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
NewVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
@@ -436,7 +457,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
-
+
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(ResNo), true))
return;
@@ -448,7 +469,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to split the result of this operator!");
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::VSELECT:
@@ -529,6 +551,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FREM:
SplitVecRes_BinOp(N, Lo, Hi);
break;
+ case ISD::FMA:
+ SplitVecRes_TernaryOp(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -548,6 +573,22 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
}
+void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op0Lo, Op0Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ SDValue Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ SDValue Op2Lo, Op2Hi;
+ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
+ Op0Lo, Op1Lo, Op2Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
+ Op0Hi, Op1Hi, Op2Hi);
+}
+
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
@@ -977,7 +1018,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to split this operator's operand!");
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
@@ -1203,15 +1246,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
GetSplitVector(N->getOperand(0), Lo, Hi);
EVT InVT = Lo.getValueType();
-
+
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
-
+
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
-
+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
-}
+}
@@ -1755,8 +1798,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (InputWidened)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getIntPtrConstant(j));
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
@@ -1816,7 +1859,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
DAG.getIntPtrConstant(0));
return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
+ SatOp, CvtCode);
}
}
@@ -1832,7 +1875,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getIntPtrConstant(i));
Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
+ SatOp, CvtCode);
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
@@ -1936,7 +1979,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
Cond1 = GetWidenedVector(Cond1);
if (Cond1.getValueType() != CondWidenVT)
- Cond1 = ModifyToType(Cond1, CondWidenVT);
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
}
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
@@ -2202,7 +2245,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
ResVT, WideSETCC, DAG.getIntPtrConstant(0));
- return PromoteTargetBoolean(CC, N->getValueType(0));
+ return PromoteTargetBoolean(CC, N->getValueType(0));
}
@@ -2371,10 +2414,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset),
- isVolatile,
- isNonTemporal, isInvariant,
- MinAlign(Align, Increment));
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) {
SmallVector<SDValue, 16> Loads;
@@ -2563,7 +2604,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(Increment));
- } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
// Restore index back to be relative to the original widen element type
Idx = Idx * NewVTWidth / ValEltWidth;
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index ff0136e..c3794d5 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -50,7 +50,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
const TargetMachine &tm = (*IS->MF).getTarget();
ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
- // This hard requirment could be relaxed, but for now
+ // This hard requirement could be relaxed, but for now
// do not let it procede.
assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
@@ -318,7 +318,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) {
// If packet is now full, reset the state so in the next cycle
// we start fresh.
- if (Packet.size() >= InstrItins->IssueWidth) {
+ if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
ResourcesModel->clearResources();
Packet.clear();
}
@@ -353,7 +353,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
}
/// Estimates change in reg pressure from this SU.
-/// It is acheived by trivial tracking of defined
+/// It is achieved by trivial tracking of defined
/// and used vregs in dependent instructions.
/// The RawPressure flag makes this function to ignore
/// existing reg file sizes, and report raw def/use
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 24da432..b7ce48a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -441,19 +441,14 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
bool Added = false;
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
- if (RegAdded.insert(Reg)) {
- LRegs.push_back(Reg);
- Added = true;
- }
- }
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias)) {
- LRegs.push_back(*Alias);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
+ if (RegAdded.insert(*AI)) {
+ LRegs.push_back(*AI);
Added = true;
}
}
+ }
return Added;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 2cb5d37..bf0a437 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -266,7 +266,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
const TargetLowering *TLI,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI,
- unsigned &RegClass, unsigned &Cost) {
+ unsigned &RegClass, unsigned &Cost,
+ const MachineFunction &MF) {
EVT VT = RegDefPos.GetValue();
// Special handling for untyped values. These values can only come from
@@ -285,7 +286,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
unsigned Idx = RegDefPos.GetIdx();
const MCInstrDesc Desc = TII->get(Opcode);
- const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
RegClass = RC->getID();
// FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
// better way to determine it.
@@ -852,7 +853,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
}
/// After backtracking, the hazard checker needs to be restored to a state
-/// corresponding the the current cycle.
+/// corresponding the current cycle.
void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
HazardRec->Reset();
@@ -1181,7 +1182,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+ for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
// Check if Ref is live.
if (!LiveRegDefs[*AliasI]) continue;
@@ -1920,7 +1921,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
RegDefPos.IsValid(); RegDefPos.Advance()) {
unsigned RCId, Cost;
- GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
return true;
@@ -2034,7 +2035,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
continue;
unsigned RCId, Cost;
- GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
RegPressure[RCId] += Cost;
break;
}
@@ -2049,7 +2050,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (SkipRegDefs > 0)
continue;
unsigned RCId, Cost;
- GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
if (RegPressure[RCId] < Cost) {
// Register pressure tracking is imprecise. This can happen. But we try
// hard not to let it happen because it likely results in poor scheduling.
@@ -2330,22 +2331,21 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
// and latency.
if (!checkPref || (left->SchedulingPref == Sched::ILP ||
right->SchedulingPref == Sched::ILP)) {
- if (DisableSchedCycles) {
+ // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
+ // is enabled, grouping instructions by cycle, then its height is already
+ // covered so only its depth matters. We also reach this point if both stall
+ // but have the same height.
+ if (!SPQ->getHazardRec()->isEnabled()) {
if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
}
- else {
- // If neither instruction stalls (!LStall && !RStall) then
- // its height is already covered so only its depth matters. We also reach
- // this if both stall but have the same height.
- int LDepth = left->getDepth() - LPenalty;
- int RDepth = right->getDepth() - RPenalty;
- if (LDepth != RDepth) {
- DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
- << ") depth " << LDepth << " vs SU (" << right->NodeNum
- << ") depth " << RDepth << "\n");
- return LDepth < RDepth ? 1 : -1;
- }
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
}
if (left->Latency != right->Latency)
return left->Latency > right->Latency ? 1 : -1;
@@ -2363,7 +2363,7 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
bool RHasPhysReg = right->hasPhysRegDefs;
if (LHasPhysReg != RHasPhysReg) {
#ifndef NDEBUG
- const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+ const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"};
#endif
DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
<< PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 69dd813..748668c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -131,28 +131,16 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
}
}
-static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
- SmallVector<EVT, 4> VTs;
- SDNode *GlueDestNode = Glue.getNode();
-
- // Don't add glue from a node to itself.
- if (GlueDestNode == N) return;
-
- // Don't add glue to something which already has glue.
- if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
-
- for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
- VTs.push_back(N->getValueType(I));
-
- if (AddGlue)
- VTs.push_back(MVT::Glue);
-
+// Helper for AddGlue to clone node operands.
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
+ SmallVectorImpl<EVT> &VTs,
+ SDValue ExtraOper = SDValue()) {
SmallVector<SDValue, 4> Ops;
for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
Ops.push_back(N->getOperand(I));
- if (GlueDestNode)
- Ops.push_back(Glue);
+ if (ExtraOper.getNode())
+ Ops.push_back(ExtraOper);
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
MachineSDNode::mmo_iterator Begin = 0, End = 0;
@@ -171,6 +159,46 @@ static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
MN->setMemRefs(Begin, End);
}
+static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+ SmallVector<EVT, 4> VTs;
+ SDNode *GlueDestNode = Glue.getNode();
+
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return false;
+
+ // Don't add a glue operand to something that already uses glue.
+ if (GlueDestNode &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ return false;
+ }
+ // Don't add glue to something that already has a glue value.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
+
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
+
+ CloneNodeWithValues(N, DAG, VTs, Glue);
+
+ return true;
+}
+
+// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
+// node even though simply shrinking the value list is sufficient.
+static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
+ assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
+ !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
+ "expected an unused glue value");
+
+ SmallVector<EVT, 4> VTs;
+ for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
+ CloneNodeWithValues(N, DAG, VTs);
+}
+
/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
/// This function finds loads of the same base and different offsets. If the
/// offsets are not far apart (target specific), it add MVT::Glue inputs and
@@ -238,19 +266,23 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
// Cluster loads by adding MVT::Glue outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
- AddGlue(Lead, SDValue(0, 0), true, DAG);
-
- SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+ SDValue InGlue = SDValue(0, 0);
+ if (AddGlue(Lead, InGlue, true, DAG))
+ InGlue = SDValue(Lead, Lead->getNumValues() - 1);
for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
bool OutGlue = I < E - 1;
SDNode *Load = Loads[I];
- AddGlue(Load, InGlue, OutGlue, DAG);
+ // If AddGlue fails, we could leave an unsused glue value. This should not
+ // cause any
+ if (AddGlue(Load, InGlue, OutGlue, DAG)) {
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
- if (OutGlue)
- InGlue = SDValue(Load, Load->getNumValues() - 1);
-
- ++LoadsClustered;
+ ++LoadsClustered;
+ }
+ else if (!OutGlue && InGlue.getNode())
+ RemoveUnusedGlue(InGlue.getNode(), DAG);
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 75940ec..5384576 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -98,12 +98,6 @@ namespace llvm {
///
virtual void computeLatency(SUnit *SU);
- /// computeOperandLatency - Override dependence edge latency using
- /// operand use/def information
- ///
- virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const { }
-
virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 92671d1..b971b69 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -14,16 +14,16 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeOrdering.h"
#include "SDNodeDbgValue.h"
+#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Intrinsics.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -71,7 +71,9 @@ static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
}
}
-SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+// Default null implementations of the callbacks.
+void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
//===----------------------------------------------------------------------===//
// ConstantFPSDNode Class
@@ -217,6 +219,22 @@ bool ISD::isScalarToVector(const SDNode *N) {
return true;
}
+/// allOperandsUndef - Return true if the node has at least one operand
+/// and all operands of the specified node are ISD::UNDEF.
+bool ISD::allOperandsUndef(const SDNode *N) {
+ // Return false if the node has no operands.
+ // This is "logically inconsistent" with the definition of "all" but
+ // is probably the desired behavior.
+ if (N->getNumOperands() == 0)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+
+ return true;
+}
+
/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
/// when given the operation for (X op Y).
ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
@@ -544,16 +562,15 @@ void SelectionDAG::RemoveDeadNodes() {
/// RemoveDeadNodes - This method deletes the unreachable nodes in the
/// given list, and any nodes that become unreachable as a result.
-void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
- DAGUpdateListener *UpdateListener) {
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
// Process the worklist, deleting the nodes and adding their uses to the
// worklist.
while (!DeadNodes.empty()) {
SDNode *N = DeadNodes.pop_back_val();
- if (UpdateListener)
- UpdateListener->NodeDeleted(N, 0);
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, 0);
// Take the node out of the appropriate CSE map.
RemoveNodeFromCSEMaps(N);
@@ -574,7 +591,7 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
}
}
-void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+void SelectionDAG::RemoveDeadNode(SDNode *N){
SmallVector<SDNode*, 16> DeadNodes(1, N);
// Create a dummy node that adds a reference to the root node, preventing
@@ -582,7 +599,7 @@ void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
// dead node.)
HandleSDNode Dummy(getRoot());
- RemoveDeadNodes(DeadNodes, UpdateListener);
+ RemoveDeadNodes(DeadNodes);
}
void SelectionDAG::DeleteNode(SDNode *N) {
@@ -684,8 +701,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
/// node. This transfer can potentially trigger recursive merging.
///
void
-SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
- DAGUpdateListener *UpdateListener) {
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
// For node types that aren't CSE'd, just act as if no identical node
// already exists.
if (!doNotCSE(N)) {
@@ -694,20 +710,19 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
// If there was already an existing matching node, use ReplaceAllUsesWith
// to replace the dead one with the existing one. This can cause
// recursive merging of other unrelated nodes down the line.
- ReplaceAllUsesWith(N, Existing, UpdateListener);
+ ReplaceAllUsesWith(N, Existing);
- // N is now dead. Inform the listener if it exists and delete it.
- if (UpdateListener)
- UpdateListener->NodeDeleted(N, Existing);
+ // N is now dead. Inform the listeners and delete it.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, Existing);
DeleteNodeNotInCSEMaps(N);
return;
}
}
- // If the node doesn't already exist, we updated it. Inform a listener if
- // it exists.
- if (UpdateListener)
- UpdateListener->NodeUpdated(N);
+ // If the node doesn't already exist, we updated it. Inform listeners.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeUpdated(N);
}
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
@@ -855,7 +870,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), Ordering(0) {
+ Root(getEntryNode()), Ordering(0), UpdateListeners(0) {
AllNodes.push_back(&EntryNode);
Ordering = new SDNodeOrdering();
DbgInfo = new SDDbgInfo();
@@ -867,6 +882,7 @@ void SelectionDAG::init(MachineFunction &mf) {
}
SelectionDAG::~SelectionDAG() {
+ assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
allnodes_clear();
delete Ordering;
delete DbgInfo;
@@ -1949,6 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero |= (~InMask);
+ KnownOne &= (~KnownZero);
return;
}
case ISD::FGETSIGN:
@@ -2246,8 +2263,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
}
// Handle LOADX separately here. EXTLOAD case will fallthrough.
- if (Op.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
unsigned ExtType = LD->getExtensionType();
switch (ExtType) {
default: break;
@@ -2675,6 +2691,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N1 == N2) return N1;
break;
case ISD::CONCAT_VECTORS:
+ // Concat of UNDEFs is UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF &&
+ N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
// one big BUILD_VECTOR.
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
@@ -3708,8 +3729,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in DebugLoc
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
TLI.getLibcallCallingConv(RTLIB::MEMCPY),
/*isTailCall=*/false,
@@ -3717,6 +3738,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
TLI.getPointerTy()),
Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
return CallResult.second;
}
@@ -3761,8 +3784,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in DebugLoc
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
/*isTailCall=*/false,
@@ -3770,6 +3793,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
TLI.getPointerTy()),
Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
return CallResult.second;
}
@@ -3822,8 +3847,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
Entry.isSExt = false;
Args.push_back(Entry);
// FIXME: pass in DebugLoc
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
TLI.getLibcallCallingConv(RTLIB::MEMSET),
/*isTailCall=*/false,
@@ -3831,6 +3856,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()),
Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
return CallResult.second;
}
@@ -4654,13 +4681,7 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
continue;
- bool NoMatch = false;
- for (unsigned i = 2; i != NumVTs; ++i)
- if (VTs[i] != I->VTs[i]) {
- NoMatch = true;
- break;
- }
- if (!NoMatch)
+ if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2]))
return *I;
}
@@ -5237,11 +5258,7 @@ namespace {
/// pointed to by a use iterator is deleted, increment the use iterator
/// so that it doesn't dangle.
///
-/// This class also manages a "downlink" DAGUpdateListener, to forward
-/// messages to ReplaceAllUsesWith's callers.
-///
class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
- SelectionDAG::DAGUpdateListener *DownLink;
SDNode::use_iterator &UI;
SDNode::use_iterator &UE;
@@ -5249,21 +5266,13 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
// Increment the iterator as needed.
while (UI != UE && N == *UI)
++UI;
-
- // Then forward the message.
- if (DownLink) DownLink->NodeDeleted(N, E);
- }
-
- virtual void NodeUpdated(SDNode *N) {
- // Just forward the message.
- if (DownLink) DownLink->NodeUpdated(N);
}
public:
- RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+ RAUWUpdateListener(SelectionDAG &d,
SDNode::use_iterator &ui,
SDNode::use_iterator &ue)
- : DownLink(dl), UI(ui), UE(ue) {}
+ : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
};
}
@@ -5273,8 +5282,7 @@ public:
///
/// This version assumes From has a single result value.
///
-void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
- DAGUpdateListener *UpdateListener) {
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
SDNode *From = FromN.getNode();
assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
"Cannot replace with this method!");
@@ -5288,7 +5296,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// is replaced by To, we don't want to replace of all its users with To
// too. See PR3018 for more info.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
- RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -5307,7 +5315,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, &Listener);
+ AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
@@ -5321,8 +5329,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
/// This version assumes that for each value of From, there is a
/// corresponding value in To in the same position with the same type.
///
-void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
- DAGUpdateListener *UpdateListener) {
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
#ifndef NDEBUG
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
assert((!From->hasAnyUseOfValue(i) ||
@@ -5337,7 +5344,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
- RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -5356,7 +5363,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, &Listener);
+ AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
@@ -5369,16 +5376,14 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
///
/// This version can replace From with any result values. To must match the
/// number and types of values returned by From.
-void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
- const SDValue *To,
- DAGUpdateListener *UpdateListener) {
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
if (From->getNumValues() == 1) // Handle the simple case efficiently.
- return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
- RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -5398,7 +5403,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, &Listener);
+ AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
@@ -5409,14 +5414,13 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The Deleted
/// vector is handled the same way as for ReplaceAllUsesWith.
-void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
- DAGUpdateListener *UpdateListener){
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
// Handle the really simple, really trivial case efficiently.
if (From == To) return;
// Handle the simple, trivial, case efficiently.
if (From.getNode()->getNumValues() == 1) {
- ReplaceAllUsesWith(From, To, UpdateListener);
+ ReplaceAllUsesWith(From, To);
return;
}
@@ -5424,7 +5428,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From.getNode()->use_begin(),
UE = From.getNode()->use_end();
- RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
bool UserRemovedFromCSEMaps = false;
@@ -5460,7 +5464,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, &Listener);
+ AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
@@ -5489,11 +5493,10 @@ namespace {
/// handled the same way as for ReplaceAllUsesWith.
void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
const SDValue *To,
- unsigned Num,
- DAGUpdateListener *UpdateListener){
+ unsigned Num){
// Handle the simple, trivial case efficiently.
if (Num == 1)
- return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+ return ReplaceAllUsesOfValueWith(*From, *To);
// Read up all the uses and make records of them. This helps
// processing new uses that are introduced during the
@@ -5538,7 +5541,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, UpdateListener);
+ AddModifiedNodeToCSEMaps(User);
}
}
@@ -5579,7 +5582,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
}
}
- // Visit all the nodes. As we iterate, moves nodes into sorted order,
+ // Visit all the nodes. As we iterate, move nodes into sorted order,
// such that by the time the end is reached all nodes will be sorted.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
SDNode *N = I;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 94cb958..8cbe818 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Constants.h"
#include "llvm/CallingConv.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
@@ -42,7 +43,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -51,6 +51,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -843,7 +844,7 @@ void SelectionDAGBuilder::clear() {
}
/// clearDanglingDebugInfo - Clear the dangling debug information
-/// map. This function is seperated from the clear so that debug
+/// map. This function is separated from the clear so that debug
/// information that is dangling in a basic block can be properly
/// resolved in a different basic block. This allows the
/// SelectionDAG to resolve dangling debug information attached
@@ -941,7 +942,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
default: llvm_unreachable("Unknown instruction type encountered!");
// Build the switch statement using the Instruction.def file.
#define HANDLE_INST(NUM, OPCODE, CLASS) \
- case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
#include "llvm/Instruction.def"
}
@@ -1578,17 +1579,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
} else
Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
} else {
- assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+ assert(CB.CC == ISD::SETCC_INVALID &&
+ "Condition is undefined for to-the-range belonging check.");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
SDValue CmpOp = getValue(CB.CmpMHS);
EVT VT = CmpOp.getValueType();
-
- if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
- ISD::SETLE);
+ ISD::SETULE);
} else {
SDValue SUB = DAG.getNode(ISD::SUB, dl,
VT, CmpOp, DAG.getConstant(Low, VT));
@@ -1826,9 +1828,13 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
const Value *Callee(I.getCalledValue());
+ const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
visitInlineAsm(&I);
- else
+ else if (Fn && Fn->isIntrinsic()) {
+ assert(Fn->getIntrinsicID() == Intrinsic::donothing);
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ } else
LowerCallTo(&I, getValue(Callee), false, LandingPad);
// If the value of the invoke is used outside of its defining block, make it
@@ -1901,8 +1907,6 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
const Value* SV,
MachineBasicBlock *Default,
MachineBasicBlock *SwitchBB) {
- Case& BackCase = *(CR.Range.second-1);
-
// Size is the number of Cases represented by this range.
size_t Size = CR.Range.second - CR.Range.first;
if (Size > 3)
@@ -1970,11 +1974,28 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
}
+ // Order cases by weight so the most likely case will be checked first.
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI) {
+ for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
+ uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
+ I->BB->getBasicBlock());
+ for (CaseItr J = CR.Range.first; J < I; ++J) {
+ uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
+ J->BB->getBasicBlock());
+ if (IWeight > JWeight)
+ std::swap(*I, *J);
+ }
+ }
+ }
// Rearrange the case blocks so that the last one falls through if possible.
- if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ Case &BackCase = *(CR.Range.second-1);
+ if (Size > 1 &&
+ NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
// The last case block won't fall through into 'NextBlock' if we emit the
// branches in this order. See if rearranging a case value would help.
- for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+ // We start at the bottom as it's the case with the least weight.
+ for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){
if (I->BB == NextBlock) {
std::swap(*I, BackCase);
break;
@@ -2006,7 +2027,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
CC = ISD::SETEQ;
LHS = SV; RHS = I->High; MHS = NULL;
} else {
- CC = ISD::SETLE;
+ CC = ISD::SETCC_INVALID;
LHS = I->Low; MHS = SV; RHS = I->High;
}
@@ -2031,14 +2052,14 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
static inline bool areJTsAllowed(const TargetLowering &TLI) {
- return !TLI.getTargetMachine().Options.DisableJumpTables &&
+ return TLI.supportJumpTables() &&
(TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
static APInt ComputeRange(const APInt &First, const APInt &Last) {
uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
- APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
+ APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
return (LastExt - FirstExt + 1ULL);
}
@@ -2104,7 +2125,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
const APInt &High = cast<ConstantInt>(I->High)->getValue();
- if (Low.sle(TEI) && TEI.sle(High)) {
+ if (Low.ule(TEI) && TEI.ule(High)) {
DestBBs.push_back(I->BB);
if (TEI==High)
++I;
@@ -2261,7 +2282,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// Create a CaseBlock record representing a conditional branch to
// the LHS node if the value being switched on SV is less than C.
// Otherwise, branch to LHS.
- CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+ CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
if (CR.CaseBB == SwitchBB)
visitSwitchCase(CB, SwitchBB);
@@ -2333,7 +2354,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
// Optimize the case where all the case values fit in a
// word without having to subtract minValue. In this case,
// we can optimize away the subtraction.
- if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
+ if (maxValue.ult(IntPtrBits)) {
cmpRange = maxValue;
} else {
lowBound = minValue;
@@ -2407,57 +2428,46 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
/// Clusterify - Transform simple list of Cases into list of CaseRange's
size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const SwitchInst& SI) {
- size_t numCmps = 0;
+
+ /// Use a shorter form of declaration, and also
+ /// show the we want to use CRSBuilder as Clusterifier.
+ typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
+
+ Clusterifier TheClusterifier;
- BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
i != e; ++i) {
const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
- uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
-
- Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
- SMBB, ExtraWeight));
- }
- std::sort(Cases.begin(), Cases.end(), CaseCmp());
-
- // Merge case into clusters
- if (Cases.size() >= 2)
- // Must recompute end() each iteration because it may be
- // invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
- J != Cases.end(); ) {
- const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
- const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
- MachineBasicBlock* nextBB = J->BB;
- MachineBasicBlock* currentBB = I->BB;
-
- // If the two neighboring cases go to the same destination, merge them
- // into a single case.
- if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
- I->High = J->High;
- J = Cases.erase(J);
-
- if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
- uint32_t CurWeight = currentBB->getBasicBlock() ?
- BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
- uint32_t NextWeight = nextBB->getBasicBlock() ?
- BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
-
- BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
- CurWeight + NextWeight);
- }
- } else {
- I = J++;
- }
+ TheClusterifier.add(i.getCaseValueEx(), SMBB);
+ }
+
+ TheClusterifier.optimize();
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ size_t numCmps = 0;
+ for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
+ e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
+ Clusterifier::Cluster &C = *i;
+ unsigned W = 0;
+ if (BPI) {
+ W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock());
+ if (!W)
+ W = 16;
+ W *= C.first.Weight;
+ BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W);
}
- for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
- if (I->Low != I->High)
- // A range counts double, since it requires two compares.
- ++numCmps;
+ // FIXME: Currently work with ConstantInt based numbers.
+ // Changing it to APInt based is a pretty heavy for this commit.
+ Cases.push_back(Case(C.first.getLow().toConstantInt(),
+ C.first.getHigh().toConstantInt(), C.second, W));
+
+ if (C.first.getLow() != C.first.getHigh())
+ // A range counts double, since it requires two compares.
+ ++numCmps;
}
return numCmps;
@@ -2804,7 +2814,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
}
// Utility for visitShuffleVector - Return true if every element in Mask,
-// begining from position Pos and ending in Pos+Size, falls within the
+// beginning from position Pos and ending in Pos+Size, falls within the
// specified sequential range [L, L+Pos). or is undef.
static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
unsigned Pos, unsigned Size, int Low) {
@@ -4914,6 +4924,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::pow:
visitPow(I);
return 0;
+ case Intrinsic::fabs:
+ setValue(&I, DAG.getNode(ISD::FABS, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -4921,6 +4936,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return 0;
+ case Intrinsic::fmuladd: {
+ EVT VT = TLI.getValueType(I.getType());
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isOperationLegal(ISD::FMA, VT) &&
+ TLI.isFMAFasterThanMulAndAdd(VT)){
+ setValue(&I, DAG.getNode(ISD::FMA, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ } else {
+ SDValue Mul = DAG.getNode(ISD::FMUL, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
+ SDValue Add = DAG.getNode(ISD::FADD, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ Mul,
+ getValue(I.getArgOperand(2)));
+ setValue(&I, Add);
+ }
+ return 0;
+ }
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
MVT::i16, getValue(I.getArgOperand(0))));
@@ -5050,7 +5088,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::gcroot:
if (GFI) {
- const Value *Alloca = I.getArgOperand(0);
+ const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
@@ -5077,16 +5115,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
}
TargetLowering::ArgListTy Args;
- std::pair<SDValue, SDValue> Result =
- TLI.LowerCallTo(getRoot(), I.getType(),
+ TargetLowering::
+ CallLoweringInfo CLI(getRoot(), I.getType(),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
Args, DAG, getCurDebugLoc());
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return 0;
}
+ case Intrinsic::debugtrap: {
+ DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ }
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::usub_with_overflow:
@@ -5139,6 +5182,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::lifetime_end:
// Discard region information.
return 0;
+ case Intrinsic::donothing:
+ // ignore
+ return 0;
}
}
@@ -5157,14 +5203,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- SmallVector<uint64_t, 4> Offsets;
GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
- Outs, TLI, &Offsets);
+ Outs, TLI);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- DAG.getMachineFunction(),
- FTy->isVarArg(), Outs,
- FTy->getContext());
+ DAG.getMachineFunction(),
+ FTy->isVarArg(), Outs,
+ FTy->getContext());
SDValue DemoteStackSlot;
int DemoteStackIdx = -100;
@@ -5247,16 +5292,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (isTailCall && TM.Options.EnableFastISel)
isTailCall = false;
- std::pair<SDValue,SDValue> Result =
- TLI.LowerCallTo(getRoot(), RetTy,
- CS.paramHasAttr(0, Attribute::SExt),
- CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
- CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
- CS.getCallingConv(),
- isTailCall,
- CS.doesNotReturn(),
- !CS.getInstruction()->use_empty(),
- Callee, Args, DAG, getCurDebugLoc());
+ TargetLowering::
+ CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
+ getCurDebugLoc(), CS);
+ std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI);
assert((isTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
@@ -5272,7 +5311,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
ComputeValueVTs(TLI, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
EVT PtrVT = PVTs[0];
- unsigned NumValues = Outs.size();
+
+ SmallVector<EVT, 4> RetTys;
+ SmallVector<uint64_t, 4> Offsets;
+ RetTy = FTy->getReturnType();
+ ComputeValueVTs(TLI, RetTy, RetTys, &Offsets);
+
+ unsigned NumValues = RetTys.size();
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(NumValues);
@@ -5280,8 +5325,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
- SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
- Add,
+ SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add,
MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
false, false, false, 1);
Values[i] = L;
@@ -5292,30 +5336,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
MVT::Other, &Chains[0], NumValues);
PendingLoads.push_back(Chain);
- // Collect the legal value parts into potentially illegal values
- // that correspond to the original function's return values.
- SmallVector<EVT, 4> RetTys;
- RetTy = FTy->getReturnType();
- ComputeValueVTs(TLI, RetTy, RetTys);
- ISD::NodeType AssertOp = ISD::DELETED_NODE;
- SmallVector<SDValue, 4> ReturnValues;
- unsigned CurReg = 0;
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
- EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
- unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-
- SDValue ReturnValue =
- getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
- RegisterVT, VT, AssertOp);
- ReturnValues.push_back(ReturnValue);
- CurReg += NumRegs;
- }
-
setValue(CS.getInstruction(),
DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
DAG.getVTList(&RetTys[0], RetTys.size()),
- &ReturnValues[0], ReturnValues.size()));
+ &Values[0], Values.size()));
}
// Assign order to nodes here. If the call does not produce a result, it won't
@@ -5952,11 +5976,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
@@ -6225,8 +6249,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
"Unknown constraint type!");
- assert(!OpInfo.isIndirect &&
- "Don't know how to handle indirect register inputs yet!");
+
+ // TODO: Support this.
+ if (OpInfo.isIndirect) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "Don't know how to handle indirect register inputs yet "
+ "for constraint '" + Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty()) {
@@ -6369,24 +6400,18 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// FIXME: When all targets are
/// migrated to using LowerCall, this hook should be integrated into SDISel.
std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
- bool RetSExt, bool RetZExt, bool isVarArg,
- bool isInreg, unsigned NumFixedArgs,
- CallingConv::ID CallConv, bool isTailCall,
- bool doesNotRet, bool isReturnValueUsed,
- SDValue Callee,
- ArgListTy &Args, SelectionDAG &DAG,
- DebugLoc dl) const {
+TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Handle all of the outgoing arguments.
- SmallVector<ISD::OutputArg, 32> Outs;
- SmallVector<SDValue, 32> OutVals;
+ CLI.Outs.clear();
+ CLI.OutVals.clear();
+ ArgListTy &Args = CLI.Args;
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
- Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+ Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
@@ -6419,8 +6444,8 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
Flags.setNest();
Flags.setOrigAlign(OriginalAlignment);
- EVT PartVT = getRegisterType(RetTy->getContext(), VT);
- unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+ EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
@@ -6429,89 +6454,88 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
else if (Args[i].isZExt)
ExtendKind = ISD::ZERO_EXTEND;
- getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
PartVT, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
- i < NumFixedArgs);
+ i < CLI.NumFixedArgs);
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
- Outs.push_back(MyFlags);
- OutVals.push_back(Parts[j]);
+ CLI.Outs.push_back(MyFlags);
+ CLI.OutVals.push_back(Parts[j]);
}
}
}
// Handle the incoming return values from the call.
- SmallVector<ISD::InputArg, 32> Ins;
+ CLI.Ins.clear();
SmallVector<EVT, 4> RetTys;
- ComputeValueVTs(*this, RetTy, RetTys);
+ ComputeValueVTs(*this, CLI.RetTy, RetTys);
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+ EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT.getSimpleVT();
- MyFlags.Used = isReturnValueUsed;
- if (RetSExt)
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
MyFlags.Flags.setSExt();
- if (RetZExt)
+ if (CLI.RetZExt)
MyFlags.Flags.setZExt();
- if (isInreg)
+ if (CLI.IsInReg)
MyFlags.Flags.setInReg();
- Ins.push_back(MyFlags);
+ CLI.Ins.push_back(MyFlags);
}
}
SmallVector<SDValue, 4> InVals;
- Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
- Outs, OutVals, Ins, dl, DAG, InVals);
+ CLI.Chain = LowerCall(CLI, InVals);
// Verify that the target's LowerCall behaved as expected.
- assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
"LowerCall didn't return a valid chain!");
- assert((!isTailCall || InVals.empty()) &&
+ assert((!CLI.IsTailCall || InVals.empty()) &&
"LowerCall emitted a return value for a tail call!");
- assert((isTailCall || InVals.size() == Ins.size()) &&
+ assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
"LowerCall didn't emit the correct number of values!");
// For a tail call, the return value is merely live-out and there aren't
// any nodes in the DAG representing it. Return a special value to
// indicate that a tail call has been emitted and no more Instructions
// should be processed in the current block.
- if (isTailCall) {
- DAG.setRoot(Chain);
+ if (CLI.IsTailCall) {
+ CLI.DAG.setRoot(CLI.Chain);
return std::make_pair(SDValue(), SDValue());
}
- DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerCall emitted a null value!");
- assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
"LowerCall emitted a value with the wrong type!");
});
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
ISD::NodeType AssertOp = ISD::DELETED_NODE;
- if (RetSExt)
+ if (CLI.RetSExt)
AssertOp = ISD::AssertSext;
- else if (RetZExt)
+ else if (CLI.RetZExt)
AssertOp = ISD::AssertZext;
SmallVector<SDValue, 4> ReturnValues;
unsigned CurReg = 0;
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+ EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
- ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT,
AssertOp));
CurReg += NumRegs;
@@ -6521,12 +6545,12 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
// such a node, so we just return a null return value in that case. In
// that case, nothing will actually look at the value.
if (ReturnValues.empty())
- return std::make_pair(SDValue(), Chain);
+ return std::make_pair(SDValue(), CLI.Chain);
- SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
- DAG.getVTList(&RetTys[0], RetTys.size()),
+ SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
+ CLI.DAG.getVTList(&RetTys[0], RetTys.size()),
&ReturnValues[0], ReturnValues.size());
- return std::make_pair(Res, Chain);
+ return std::make_pair(Res, CLI.Chain);
}
void TargetLowering::LowerOperationWrapper(SDNode *N,
@@ -6746,7 +6770,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// Note down frame index.
if (FrameIndexSDNode *FI =
- dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8393b41..d0fde6f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -180,17 +180,6 @@ private:
typedef std::vector<CaseRec> CaseRecVector;
- /// The comparison function for sorting the switch case values in the vector.
- /// WARNING: Case ranges should be disjoint!
- struct CaseCmp {
- bool operator()(const Case &C1, const Case &C2) {
- assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
- const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
- const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
- return CI1->getValue().slt(CI2->getValue());
- }
- };
-
struct CaseBitsCmp {
bool operator()(const CaseBits &C1, const CaseBits &C2) {
return C1.Bits > C2.Bits;
@@ -351,7 +340,7 @@ public:
void clear();
/// clearDanglingDebugInfo - Clear the dangling debug information
- /// map. This function is seperated from the clear so that debug
+ /// map. This function is separated from the clear so that debug
/// information that is dangling in a basic block can be properly
/// resolved in a different basic block. This allows the
/// SelectionDAG to resolve dangling debug information attached
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index f981afb..9fc225f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "ScheduleDAGSDNodes.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/Assembly/Writer.h"
@@ -19,7 +20,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -265,6 +265,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STACKSAVE: return "stacksave";
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
+ case ISD::DEBUGTRAP: return "debugtrap";
// Bit manipulation
case ISD::BSWAP: return "bswap";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 605509b..287c679 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -14,12 +14,8 @@
#define DEBUG_TYPE "isel"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
@@ -27,7 +23,10 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -38,6 +37,7 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -263,8 +263,6 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-void SelectionDAGISel::ISelUpdater::anchor() { }
-
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
@@ -451,9 +449,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
}
}
- done:;
}
+ done:
// Determine if there is a call to setjmp in the machine function.
MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
@@ -468,8 +466,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// If To is also scheduled to be replaced, find what its ultimate
// replacement is.
for (;;) {
- DenseMap<unsigned, unsigned>::iterator J =
- FuncInfo->RegFixups.find(To);
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
if (J == E) break;
To = J->second;
}
@@ -703,6 +700,25 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->clear();
}
+namespace {
+/// ISelUpdater - helper class to handle updates of the instruction selection
+/// graph.
+class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::allnodes_iterator &ISelPosition;
+public:
+ ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
+ : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
+
+ /// NodeDeleted - Handle nodes deleted from the graph. If the node being
+ /// deleted is the current ISelPosition node, update ISelPosition.
+ ///
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ if (ISelPosition == SelectionDAG::allnodes_iterator(N))
+ ++ISelPosition;
+ }
+};
+} // end anonymous namespace
+
void SelectionDAGISel::DoInstructionSelection() {
DEBUG(errs() << "===== Instruction selection begins: BB#"
<< FuncInfo->MBB->getNumber()
@@ -719,9 +735,13 @@ void SelectionDAGISel::DoInstructionSelection() {
// a reference to the root node, preventing it from being deleted,
// and tracking any changes of the root.
HandleSDNode Dummy(CurDAG->getRoot());
- ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+ SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode());
++ISelPosition;
+ // Make sure that ISelPosition gets properly updated when nodes are deleted
+ // in calls made from this function.
+ ISelUpdater ISU(*CurDAG, ISelPosition);
+
// The AllNodes list is now topological-sorted. Visit the
// nodes by starting at the end of the list (the root of the
// graph) and preceding back toward the beginning (the entry
@@ -748,10 +768,8 @@ void SelectionDAGISel::DoInstructionSelection() {
// If after the replacement this node is not used any more,
// remove this dead node.
- if (Node->use_empty()) { // Don't delete EntryToken, etc.
- ISelUpdater ISU(ISelPosition);
- CurDAG->RemoveDeadNode(Node, &ISU);
- }
+ if (Node->use_empty()) // Don't delete EntryToken, etc.
+ CurDAG->RemoveDeadNode(Node);
}
CurDAG->setRoot(Dummy.getValue());
@@ -1680,8 +1698,6 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
- ISelUpdater ISU(ISelPosition);
-
// Now that all the normal results are replaced, we replace the chain and
// glue results if present.
if (!ChainNodesMatched.empty()) {
@@ -1705,7 +1721,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
if (ChainVal.getValueType() == MVT::Glue)
ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
- CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+ CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode->use_empty() &&
@@ -1728,7 +1744,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
"Doesn't have a glue result");
CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
- InputGlue, &ISU);
+ InputGlue);
// If the node became dead and we haven't already seen it, delete it.
if (FRN->use_empty() &&
@@ -1738,7 +1754,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
}
if (!NowDeadNodes.empty())
- CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+ CurDAG->RemoveDeadNodes(NowDeadNodes);
DEBUG(errs() << "ISEL: Match complete!\n");
}
@@ -1759,7 +1775,7 @@ enum ChainResult {
/// The walk we do here is guaranteed to be small because we quickly get down to
/// already selected nodes "below" us.
static ChainResult
-WalkChainUsers(SDNode *ChainedNode,
+WalkChainUsers(const SDNode *ChainedNode,
SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
ChainResult Result = CR_Simple;
@@ -1992,14 +2008,14 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SelectionDAGISel &SDISel) {
+ const SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SelectionDAGISel &SDISel, SDNode *N) {
+ const SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
@@ -2062,7 +2078,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, SelectionDAGISel &SDISel) {
+ SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
@@ -2075,7 +2091,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, SelectionDAGISel &SDISel) {
+ SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
@@ -2094,7 +2110,8 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// MatcherIndex to continue with.
static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
- bool &Result, SelectionDAGISel &SDISel,
+ bool &Result,
+ const SelectionDAGISel &SDISel,
SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
switch (Table[Index++]) {
default:
@@ -2759,9 +2776,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
(SDNode*) 0));
}
- } else {
+ } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) {
Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
EmitNodeInfo);
+ } else {
+ // NodeToMatch was eliminated by CSE when the target changed the DAG.
+ // We will visit the equivalent node later.
+ DEBUG(dbgs() << "Node was eliminated by CSE\n");
+ return 0;
}
// If the node had chain/glue results, update our notion of the current
@@ -2959,6 +2981,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_VOID) {
N->printrFull(Msg, CurDAG);
+ Msg << "\nIn function: " << MF->getFunction()->getName();
} else {
bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
unsigned iid =
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 6cde05a..173ffac 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -13,13 +13,13 @@
#include "ScheduleDAGSDNodes.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e341e15..dff9b2c 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -32,13 +33,6 @@
#include <cctype>
using namespace llvm;
-/// We are in the process of implementing a new TypeLegalization action
-/// - the promotion of vector elements. This feature is disabled by default
-/// and only enabled using this flag.
-static cl::opt<bool>
-AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
- cl::desc("Allow promotion of integer vector element types"));
-
/// InitLibcallNames - Set default libcall names.
///
static void InitLibcallNames(const char **Names) {
@@ -521,8 +515,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
/// NOTE: The constructor takes ownership of TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm,
const TargetLoweringObjectFile *tlof)
- : TM(tm), TD(TM.getTargetData()), TLOF(*tlof),
- mayPromoteElements(AllowPromoteIntElem) {
+ : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -604,6 +597,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
IntDivIsCheap = false;
Pow2DivIsCheap = false;
JumpIsExpensive = false;
+ predictableSelectIsExpensive = false;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
@@ -618,6 +612,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
MinStackArgumentAlignment = 1;
ShouldFoldAtomicFences = false;
InsertFencesForAtomic = false;
+ SupportJumpTables = true;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
@@ -708,42 +703,34 @@ bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
return false;
}
-/// hasLegalSuperRegRegClasses - Return true if the specified register class
-/// has one or more super-reg register classes that are legal.
-bool
-TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{
- if (*RC->superregclasses_begin() == 0)
- return false;
- for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
- E = RC->superregclasses_end(); I != E; ++I) {
- const TargetRegisterClass *RRC = *I;
- if (isLegalRC(RRC))
- return true;
- }
- return false;
-}
-
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
std::pair<const TargetRegisterClass*, uint8_t>
TargetLowering::findRepresentativeClass(EVT VT) const {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
if (!RC)
return std::make_pair(RC, 0);
+
+ // Compute the set of all super-register classes.
+ BitVector SuperRegRC(TRI->getNumRegClasses());
+ for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
+ SuperRegRC.setBitsInMask(RCI.getMask());
+
+ // Find the first legal register class with the largest spill size.
const TargetRegisterClass *BestRC = RC;
- for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
- E = RC->superregclasses_end(); I != E; ++I) {
- const TargetRegisterClass *RRC = *I;
- if (RRC->isASubClass() || !isLegalRC(RRC))
+ for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+ const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
+ // We want the largest possible spill size.
+ if (SuperRC->getSize() <= BestRC->getSize())
+ continue;
+ if (!isLegalRC(SuperRC))
continue;
- if (!hasLegalSuperRegRegClasses(RRC))
- return std::make_pair(RRC, 1);
- BestRC = RRC;
+ BestRC = SuperRC;
}
return std::make_pair(BestRC, 1);
}
-
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLowering::computeRegisterProperties() {
@@ -835,11 +822,8 @@ void TargetLowering::computeRegisterProperties() {
unsigned NElts = VT.getVectorNumElements();
if (NElts != 1) {
bool IsLegalWiderType = false;
- // If we allow the promotion of vector elements using a flag,
- // then return TypePromoteInteger on vector elements.
// First try to promote the elements of integer vectors. If no legal
// promotion was found, fallback to the widen-vector method.
- if (mayPromoteElements)
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
EVT SVT = (MVT::SimpleValueType)nVT;
// Promote vectors of integers to vectors with the same number
@@ -940,9 +924,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
// If there is a wider vector type with the same element type as this one,
- // we should widen to that legal vector type. This handles things like
- // <2 x float> -> <4 x float>.
- if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) {
+ // or a promoted vector type that has the same number of elements which
+ // are wider, then we should convert to that legal vector type.
+ // This handles things like <2 x float> -> <4 x float> and
+ // <4 x i1> -> <4 x i32>.
+ LegalizeTypeAction TA = getTypeAction(Context, VT);
+ if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
RegisterVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterVT)) {
IntermediateVT = RegisterVT;
@@ -1000,13 +987,11 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
/// TODO: Move this out of TargetLowering.cpp.
void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
- const TargetLowering &TLI,
- SmallVectorImpl<uint64_t> *Offsets) {
+ const TargetLowering &TLI) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, ReturnType, ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
- unsigned Offset = 0;
for (unsigned j = 0, f = NumValues; j != f; ++j) {
EVT VT = ValueVTs[j];
@@ -1029,8 +1014,6 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
- unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
- PartVT.getTypeForEVT(ReturnType->getContext()));
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -1045,10 +1028,6 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
- if (Offsets) {
- Offsets->push_back(Offset);
- Offset += PartSize;
- }
}
}
}
@@ -2019,7 +1998,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- // Make sure we're not loosing bits from the constant.
+ // Make sure we're not losing bits from the constant.
if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
@@ -2343,6 +2322,55 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
}
+
+ if (C1.getMinSignedBits() <= 64 &&
+ !isLegalICmpImmediate(C1.getSExtValue())) {
+ // (X & -256) == 256 -> (X >> 8) == 1
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC = AndRHS->getAPIntValue();
+ if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
+ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
+ Cond == ISD::SETULE || Cond == ISD::SETUGT) {
+ bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
+ // X < 0x100000000 -> (X >> 32) < 1
+ // X >= 0x100000000 -> (X >> 32) >= 1
+ // X <= 0x0ffffffff -> (X >> 32) < 1
+ // X > 0x0ffffffff -> (X >> 32) >= 1
+ unsigned ShiftBits;
+ APInt NewC = C1;
+ ISD::CondCode NewCond = Cond;
+ if (AdjOne) {
+ ShiftBits = C1.countTrailingOnes();
+ NewC = NewC + 1;
+ NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ } else {
+ ShiftBits = C1.countTrailingZeros();
+ }
+ NewC = NewC.lshr(ShiftBits);
+ if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(NewC, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
+ }
+ }
+ }
}
if (isa<ConstantFPSDNode>(N0.getNode())) {
@@ -2411,21 +2439,28 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ uint64_t EqVal;
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond);
+ break;
+ case ZeroOrNegativeOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+ break;
+ }
+
// We can always fold X == X for integer setcc's.
if (N0.getValueType().isInteger()) {
- switch (getBooleanContents(N0.getValueType().isVector())) {
- case UndefinedBooleanContent:
- case ZeroOrOneBooleanContent:
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
- case ZeroOrNegativeOneBooleanContent:
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
- }
+ return DAG.getConstant(EqVal, VT);
}
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ return DAG.getConstant(EqVal, VT);
if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
- return DAG.getConstant(UOF, VT);
+ return DAG.getConstant(EqVal, VT);
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
@@ -2998,10 +3033,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 0016047..8a6b120 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -26,13 +26,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "shadowstackgc"
-#include "llvm/CodeGen/GCs.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCs.h"
#include "llvm/Support/CallSite.h"
-#include "llvm/Support/IRBuilder.h"
using namespace llvm;
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 9a86f32..980bd74 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -13,28 +13,28 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sjljehprepare"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <set>
using namespace llvm;
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 26cf259..c8c3fb3 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -62,7 +62,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
assert(mi2iMap.empty() &&
"MachineInstr -> Index mapping non-empty at initial numbering?");
- functionSize = 0;
unsigned index = 0;
MBBRanges.resize(mf->getNumBlockIDs());
idx2MBBMap.reserve(mf->size());
@@ -89,8 +88,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
// Save this base index in the maps.
mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(),
SlotIndex::Slot_Block)));
-
- ++functionSize;
}
// We insert one blank instructions between basic blocks.
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 6f33f54..320128a 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -207,6 +207,17 @@ void SpillPlacement::activate(unsigned n) {
return;
ActiveNodes->set(n);
nodes[n].clear();
+
+ // Very large bundles usually come from big switches, indirect branches,
+ // landing pads, or loops with many 'continue' statements. It is difficult to
+ // allocate registers when so many different blocks are involved.
+ //
+ // Give a small negative bias to large bundles such that 1/32 of the
+ // connected blocks need to be interested before we consider expanding the
+ // region through the bundle. This helps compile time by limiting the number
+ // of blocks visited and the number of links in the Hopfield network.
+ if (bundles->getBlocks(n).size() > 100)
+ nodes[n].Bias = -0.0625f;
}
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 9959f74..9a751c1 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -345,9 +345,11 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
Values.clear();
// Reset the LiveRangeCalc instances needed for this spill mode.
- LRCalc[0].reset(&VRM.getMachineFunction());
+ LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
if (SpillMode)
- LRCalc[1].reset(&VRM.getMachineFunction());
+ LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
// We don't need an AliasAnalysis since we will only be performing
// cheap-as-a-copy remats anyway.
@@ -924,11 +926,9 @@ bool SplitEditor::transferValues() {
DEBUG(dbgs() << '\n');
}
- LRCalc[0].calculateValues(LIS.getSlotIndexes(), &MDT,
- &LIS.getVNInfoAllocator());
+ LRCalc[0].calculateValues();
if (SpillMode)
- LRCalc[1].calculateValues(LIS.getSlotIndexes(), &MDT,
- &LIS.getVNInfoAllocator());
+ LRCalc[1].calculateValues();
return Skipped;
}
@@ -953,8 +953,7 @@ void SplitEditor::extendPHIKillRanges() {
if (Edit->getParent().liveAt(LastUse)) {
assert(RegAssign.lookup(LastUse) == RegIdx &&
"Different register assignment in phi predecessor");
- LRC.extend(LI, End,
- LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator());
+ LRC.extend(LI, End);
}
}
}
@@ -1004,8 +1003,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
} else
Idx = Idx.getRegSlot(true);
- getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(),
- &MDT, &LIS.getVNInfoAllocator());
+ getLRCalc(RegIdx).extend(LI, Idx.getNextSlot());
}
}
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 1e940b1..20da36e 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -46,7 +46,6 @@ STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
namespace {
class StackSlotColoring : public MachineFunctionPass {
- bool ColorWithRegs;
LiveStacks* LS;
MachineFrameInfo *MFI;
const TargetInstrInfo *TII;
@@ -82,7 +81,7 @@ namespace {
public:
static char ID; // Pass identification
StackSlotColoring() :
- MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+ MachineFunctionPass(ID), NextColor(-1) {
initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 8ebfbca..a813fa6 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -20,12 +20,15 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -57,8 +60,10 @@ namespace {
/// TailDuplicatePass - Perform tail duplication.
class TailDuplicatePass : public MachineFunctionPass {
const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
+ OwningPtr<RegScavenger> RS;
bool PreRegAlloc;
// SSAUpdateVRs - A list of virtual registers for which to update SSA form.
@@ -124,9 +129,13 @@ INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
MRI = &MF.getRegInfo();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
PreRegAlloc = MRI->isSSA();
+ RS.reset();
+ if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
+ RS.reset(new RegScavenger());
bool MadeChange = false;
while (TailDuplicateBlocks(MF))
@@ -272,8 +281,8 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
continue;
unsigned Dst = Copy->getOperand(0).getReg();
unsigned Src = Copy->getOperand(1).getReg();
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
- if (++UI == MRI->use_end()) {
+ if (MRI->hasOneNonDBGUse(Src) &&
+ MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
// Copy is the only use. Do trivial copy propagation here.
MRI->replaceRegWith(Dst, Src);
Copy->eraseFromParent();
@@ -429,8 +438,10 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
AddSSAUpdateEntry(Reg, NewReg, PredBB);
} else {
DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
- if (VI != LocalVRMap.end())
+ if (VI != LocalVRMap.end()) {
MO.setReg(VI->second);
+ MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
+ }
}
}
PredBB->insert(PredBB->instr_end(), NewMI);
@@ -775,6 +786,23 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// Remove PredBB's unconditional branch.
TII->RemoveBranch(*PredBB);
+ if (RS && !TailBB->livein_empty()) {
+ // Update PredBB livein.
+ RS->enterBasicBlock(PredBB);
+ if (!PredBB->empty())
+ RS->forward(prior(PredBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
+ E = TailBB->livein_end(); I != E; ++I) {
+ if (!RegsLiveAtExit[*I])
+ // If a register is previously livein to the tail but it's not live
+ // at the end of predecessor BB, then it should be added to its
+ // livein list.
+ PredBB->addLiveIn(*I);
+ }
+ }
+
// Clone the contents of TailBB into PredBB.
DenseMap<unsigned, unsigned> LocalVRMap;
SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 2beb928..a3d6771 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -501,6 +501,14 @@ CreateTargetHazardRecognizer(const TargetMachine *TM,
return new ScheduleHazardRecognizer();
}
+// Default implementation of CreateTargetMIHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "misched");
+}
+
// Default implementation of CreateTargetPostRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfoImpl::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
@@ -509,6 +517,10 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
+//===----------------------------------------------------------------------===//
+// SelectionDAG latency interface.
+//===----------------------------------------------------------------------===//
+
int
TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
@@ -537,3 +549,199 @@ int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
}
+//===----------------------------------------------------------------------===//
+// MachineInstr latency interface.
+//===----------------------------------------------------------------------===//
+
+unsigned
+TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI->getDesc().getSchedClass();
+ int UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps >= 0)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI) const {
+ if (DefMI->mayLoad())
+ return ItinData->SchedModel->LoadLatency;
+ if (isHighLatencyDef(DefMI->getOpcode()))
+ return ItinData->SchedModel->HighLatency;
+ return 1;
+}
+
+unsigned TargetInstrInfoImpl::
+getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return MI->mayLoad() ? 2 : 1;
+
+ return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// Both DefMI and UseMI must be valid. By default, call directly to the
+/// itinerary. This may be overriden by the target.
+int TargetInstrInfoImpl::
+getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+/// If we can determine the operand latency from the def only, without itinerary
+/// lookup, do so. Otherwise return -1.
+static int computeDefOperandLatency(
+ const TargetInstrInfo *TII, const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, bool FindMin) {
+
+ // Let the target hook getInstrLatency handle missing itineraries.
+ if (!ItinData)
+ return TII->getInstrLatency(ItinData, DefMI);
+
+ // Return a latency based on the itinerary properties and defining instruction
+ // if possible. Some common subtargets don't require per-operand latency,
+ // especially for minimum latencies.
+ if (FindMin) {
+ // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
+ // it exists before defaulting to MinLatency.
+ if (ItinData->SchedModel->MinLatency >= 0)
+ return TII->getInstrLatency(ItinData, DefMI);
+
+ // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
+ // For empty itineraries, short-cirtuit the check and default to one cycle.
+ if (ItinData->isEmpty())
+ return 1;
+ }
+ else if(ItinData->isEmpty())
+ return TII->defaultDefLatency(ItinData, DefMI);
+
+ // ...operand lookup required
+ return -1;
+}
+
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use when the operand indices are already known.
+///
+/// FindMin may be set to get the minimum vs. expected latency.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx,
+ bool FindMin) const {
+
+ int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
+
+ int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ return InstrLatency;
+}
+
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an
+/// unknown use. Depending on the subtarget's itinerary properties, this may or
+/// may not need to call getOperandLatency().
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
+///
+/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency.
+/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+ const TargetRegisterInfo *TRI,
+ const MachineInstr *DefMI, const MachineInstr *UseMI,
+ unsigned Reg, bool FindMin) const {
+
+ int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
+
+ // Find the definition of the register in the defining instruction.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+ if (DefIdx != -1) {
+ const MachineOperand &MO = DefMI->getOperand(DefIdx);
+ if (MO.isReg() && MO.isImplicit() &&
+ DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+ // This is an implicit def, getOperandLatency() won't return the correct
+ // latency. e.g.
+ // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+ // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+ // What we want is to compute latency between def of %D6/%D7 and use of
+ // %Q3 instead.
+ unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (DefMI->getOperand(Op2).isReg())
+ DefIdx = Op2;
+ }
+ // For all uses of the register, calculate the maxmimum latency
+ int OperLatency = -1;
+
+ // UseMI is null, then it must be a scheduling barrier.
+ if (!UseMI) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+ }
+ else {
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i);
+ OperLatency = std::max(OperLatency, UseCycle);
+ }
+ }
+ // If we found an operand latency, we're done.
+ if (OperLatency >= 0)
+ return OperLatency;
+ }
+ // No operand latency was found.
+ unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ return InstrLatency;
+}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9925185..2a2fa9e 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -93,8 +93,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
// N.B.: The defaults used in here are no the same ones used in MC.
// We follow gcc, MC follows gas. For example, given ".section .eh_frame",
// both gas and MC will produce a section with no flags. Given
- // section(".eh_frame") gcc will produce
- // .section .eh_frame,"a",@progbits
+ // section(".eh_frame") gcc will produce:
+ //
+ // .section .eh_frame,"a",@progbits
if (Name.empty() || Name[0] != '.') return K;
// Some lame default implementation based on some magic section names.
@@ -349,10 +350,17 @@ TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
if (Priority == 65535)
return StaticCtorSection;
- std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ if (UseInitArray) {
+ std::string Name = std::string(".init_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ } else {
+ std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
}
const MCSection *
@@ -362,10 +370,35 @@ TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
if (Priority == 65535)
return StaticDtorSection;
- std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ if (UseInitArray) {
+ std::string Name = std::string(".fini_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ } else {
+ std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+}
+
+void
+TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
+ UseInitArray = UseInitArray_;
+ if (!UseInitArray)
+ return;
+
+ StaticCtorSection =
+ getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
}
//===----------------------------------------------------------------------===//
@@ -379,7 +412,7 @@ emitModuleFlags(MCStreamer &Streamer,
ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
Mangler *Mang, const TargetMachine &TM) const {
unsigned VersionVal = 0;
- unsigned GCFlags = 0;
+ unsigned ImageInfoFlags = 0;
StringRef SectionVal;
for (ArrayRef<Module::ModuleFlagEntry>::iterator
@@ -396,8 +429,9 @@ emitModuleFlags(MCStreamer &Streamer,
if (Key == "Objective-C Image Info Version")
VersionVal = cast<ConstantInt>(Val)->getZExtValue();
else if (Key == "Objective-C Garbage Collection" ||
- Key == "Objective-C GC Only")
- GCFlags |= cast<ConstantInt>(Val)->getZExtValue();
+ Key == "Objective-C GC Only" ||
+ Key == "Objective-C Is Simulated")
+ ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue();
else if (Key == "Objective-C Image Info Section")
SectionVal = cast<MDString>(Val)->getString();
}
@@ -424,7 +458,7 @@ emitModuleFlags(MCStreamer &Streamer,
Streamer.EmitLabel(getContext().
GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
Streamer.EmitIntValue(VersionVal, 4);
- Streamer.EmitIntValue(GCFlags, 4);
+ Streamer.EmitIntValue(ImageInfoFlags, 4);
Streamer.AddBlankLine();
}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index c30b133..e4c0119 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -102,7 +102,7 @@ namespace {
MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
unsigned Dist);
- bool isProfitableToCommute(unsigned regB, unsigned regC,
+ bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist);
@@ -483,32 +483,6 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
return false;
}
-/// findLocalKill - Look for an instruction below MI in the MBB that kills the
-/// specified register. Returns null if there are any other Reg use between the
-/// instructions.
-static
-MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
- MachineInstr *MI, MachineRegisterInfo *MRI,
- DenseMap<MachineInstr*, unsigned> &DistanceMap) {
- MachineInstr *KillMI = 0;
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(Reg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineInstr *UseMI = &*UI;
- if (UseMI == MI || UseMI->getParent() != MBB)
- continue;
- if (DistanceMap.count(UseMI))
- continue;
- if (!UI.getOperand().isKill())
- return 0;
- if (KillMI)
- return 0; // -O0 kill markers cannot be trusted?
- KillMI = UseMI;
- }
-
- return KillMI;
-}
-
/// findOnlyInterestingUse - Given a register, if has a single in-basic block
/// use, return the use instruction if it's a copy or a two-address use.
static
@@ -567,7 +541,8 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
/// isProfitableToReMat - Return true if it's potentially profitable to commute
/// the two-address instruction that's being processed.
bool
-TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
+TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
+ unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist) {
if (OptLevel == CodeGenOpt::None)
@@ -604,15 +579,15 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
// %reg1026<def> = ADD %reg1024, %reg1025
// r0 = MOV %reg1026
// Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
- unsigned FromRegB = getMappedReg(regB, SrcRegMap);
- unsigned FromRegC = getMappedReg(regC, SrcRegMap);
- unsigned ToRegB = getMappedReg(regB, DstRegMap);
- unsigned ToRegC = getMappedReg(regC, DstRegMap);
- if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
- ((!FromRegC && !ToRegC) ||
- regsAreCompatible(FromRegB, ToRegC, TRI) ||
- regsAreCompatible(FromRegC, ToRegB, TRI)))
- return true;
+ unsigned ToRegA = getMappedReg(regA, DstRegMap);
+ if (ToRegA) {
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI);
+ bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI);
+ if (BComp != CComp)
+ return !BComp && CComp;
+ }
// If there is a use of regC between its last def (could be livein) and this
// instruction, then bail.
@@ -904,14 +879,19 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned Reg) {
+ // Bail immediately if we don't have LV available. We use it to find kills
+ // efficiently.
+ if (!LV)
+ return false;
+
MachineInstr *MI = &*mi;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
if (DI == DistanceMap.end())
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
- if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
// Don't mess with copies, they may be coalesced later.
return false;
@@ -998,6 +978,12 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
// Don't want to extend other live ranges and update kills.
return false;
+ if (MOReg == Reg && !MO.isKill())
+ // We can't schedule across a use of the register in question.
+ return false;
+ // Ensure that if this is register in question, its the kill we expect.
+ assert((MOReg != Reg || OtherMI == KillMI) &&
+ "Found multiple kills of a register in a basic block");
}
}
}
@@ -1011,20 +997,11 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
MBB->splice(KillPos, MBB, From, To);
DistanceMap.erase(DI);
- if (LV) {
- // Update live variables
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
- } else {
- for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = KillMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
- continue;
- MO.setIsKill(false);
- }
- MI->addRegisterKilled(Reg, 0);
- }
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
return true;
}
@@ -1045,7 +1022,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
return true; // Below MI
unsigned DefDist = DDI->second;
assert(Dist > DefDist && "Visited def already?");
- if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
return true;
}
return false;
@@ -1060,14 +1037,19 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned Reg) {
+ // Bail immediately if we don't have LV available. We use it to find kills
+ // efficiently.
+ if (!LV)
+ return false;
+
MachineInstr *MI = &*mi;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
if (DI == DistanceMap.end())
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
- if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
// Don't mess with copies, they may be coalesced later.
return false;
@@ -1093,6 +1075,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
continue;
if (isDefTooClose(MOReg, DI->second, MI, MBB))
return false;
+ if (MOReg == Reg && !MO.isKill())
+ return false;
Uses.insert(MOReg);
if (MO.isKill() && MOReg != Reg)
Kills.insert(MOReg);
@@ -1134,6 +1118,9 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
if (Kills.count(MOReg))
// Don't want to extend other live ranges and update kills.
return false;
+ if (OtherMI != MI && MOReg == Reg && !MO.isKill())
+ // We can't schedule across a use of the register in question.
+ return false;
} else {
OtherDefs.push_back(MOReg);
}
@@ -1164,19 +1151,11 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
DistanceMap.erase(DI);
- if (LV) {
- // Update live variables
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
- } else {
- for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = KillMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
- continue;
- MO.setIsKill(false);
- }
- MI->addRegisterKilled(Reg, 0);
- }
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+
+ DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
return true;
}
@@ -1208,9 +1187,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
++NumDeletes;
- return true; // Done with this instruction.
+ DEBUG(dbgs() << "\tdeleted unused instruction.\n");
+ return true; // Done with this instruction."
}
+ if (TargetRegisterInfo::isVirtualRegister(regA))
+ ScanUses(regA, &*mbbi, Processed);
+
// Check if it is profitable to commute the operands.
unsigned SrcOp1, SrcOp2;
unsigned regC = 0;
@@ -1230,7 +1213,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// If C dies but B does not, swap the B and C operands.
// This makes the live ranges of A and C joinable.
TryCommute = true;
- else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) {
+ else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) {
TryCommute = true;
AggressiveCommute = true;
}
@@ -1252,9 +1235,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return true;
}
- if (TargetRegisterInfo::isVirtualRegister(regA))
- ScanUses(regA, &*mbbi, Processed);
-
if (MI.isConvertibleTo3Addr()) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
@@ -1298,7 +1278,8 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// Unfold the load.
DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
const TargetRegisterClass *RC =
- TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
+ TRI->getAllocatableClass(
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, MF));
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
@@ -1454,31 +1435,50 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
"two address instruction invalid");
unsigned regB = mi->getOperand(SrcIdx).getReg();
+
+ // Deal with <undef> uses immediately - simply rewrite the src operand.
+ if (mi->getOperand(SrcIdx).isUndef()) {
+ unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ // Constrain the DstReg register class if required.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+ TRI, MF))
+ MRI->constrainRegClass(DstReg, RC);
+ mi->getOperand(SrcIdx).setReg(DstReg);
+ DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi);
+ continue;
+ }
TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
}
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs
+ = TiedOperands.begin()->second;
+ if (TiedPairs.size() == 1) {
+ unsigned SrcIdx = TiedPairs[0].first;
+ unsigned DstIdx = TiedPairs[0].second;
+ unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
+ unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ if (SrcReg != DstReg &&
+ TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
+ Processed)) {
+ // The tied operands have been eliminated or shifted further down the
+ // block to ease elimination. Continue processing with 'nmi'.
+ TiedOperands.clear();
+ mi = nmi;
+ continue;
+ }
+ }
+ }
+
// Now iterate over the information collected above.
for (TiedOperandMap::iterator OI = TiedOperands.begin(),
OE = TiedOperands.end(); OI != OE; ++OI) {
SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
- // If the instruction has a single pair of tied operands, try some
- // transformations that may either eliminate the tied operands or
- // improve the opportunities for coalescing away the register copy.
- if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
- unsigned SrcIdx = TiedPairs[0].first;
- unsigned DstIdx = TiedPairs[0].second;
-
- // If the registers are already equal, nothing needs to be done.
- if (mi->getOperand(SrcIdx).getReg() ==
- mi->getOperand(DstIdx).getReg())
- break; // Done with this instruction.
-
- if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
- Processed))
- break; // The tied operands have been eliminated.
- }
-
bool IsEarlyClobber = false;
bool RemovedKillFlag = false;
bool AllUsesCopied = true;
@@ -1519,8 +1519,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
#endif
// Emit a copy or rematerialize the definition.
+ bool isCopy = false;
const TargetRegisterClass *rc = MRI->getRegClass(regB);
- MachineInstr *DefMI = MRI->getVRegDef(regB);
+ MachineInstr *DefMI = MRI->getUniqueVRegDef(regB);
// If it's safe and profitable, remat the definition instead of
// copying it.
if (DefMI &&
@@ -1535,10 +1536,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
} else {
BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
regA).addReg(regB);
+ isCopy = true;
}
- MachineBasicBlock::iterator prevMI = prior(mi);
// Update DistanceMap.
+ MachineBasicBlock::iterator prevMI = prior(mi);
DistanceMap.insert(std::make_pair(prevMI, Dist));
DistanceMap[mi] = ++Dist;
@@ -1551,7 +1553,17 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
MO.setIsKill(false);
RemovedKillFlag = true;
}
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (TargetRegisterInfo::isVirtualRegister(regA) &&
+ TargetRegisterInfo::isVirtualRegister(regB))
+ MRI->constrainRegClass(regA, MRI->getRegClass(regB));
+
MO.setReg(regA);
+
+ if (isCopy)
+ // Propagate SrcRegMap.
+ SrcRegMap[regA] = regB;
}
if (AllUsesCopied) {
@@ -1587,27 +1599,32 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
}
- // Schedule the source copy / remat inserted to form two-address
- // instruction. FIXME: Does it matter the distance map may not be
- // accurate after it's scheduled?
- TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
+ // We didn't change anything if there was a single tied pair, and that
+ // pair didn't require copies.
+ if (AllUsesCopied || TiedPairs.size() > 1) {
+ MadeChange = true;
- MadeChange = true;
+ // Schedule the source copy / remat inserted to form two-address
+ // instruction. FIXME: Does it matter the distance map may not be
+ // accurate after it's scheduled?
+ TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
+ }
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ }
- // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
- if (mi->isInsertSubreg()) {
- // From %reg = INSERT_SUBREG %reg, %subreg, subidx
- // To %reg:subidx = COPY %subreg
- unsigned SubIdx = mi->getOperand(3).getImm();
- mi->RemoveOperand(3);
- assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
- mi->getOperand(0).setSubReg(SubIdx);
- mi->RemoveOperand(1);
- mi->setDesc(TII->get(TargetOpcode::COPY));
- DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
- }
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
}
// Clear TiedOperands here instead of at the top of the loop
@@ -1694,9 +1711,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
continue;
// Check that the instructions are all in the same basic block.
- MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg);
- MachineInstr *DstDefMI = MRI->getVRegDef(DstReg);
- if (SrcDefMI->getParent() != DstDefMI->getParent())
+ MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg);
+ MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg);
+ if (!SrcDefMI || !DstDefMI ||
+ SrcDefMI->getParent() != DstDefMI->getParent())
continue;
// If there are no other uses than copies which feed into
@@ -1832,6 +1850,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
SmallVector<unsigned, 4> RealSrcs;
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+ // Nothing needs to be inserted for <undef> operands.
+ if (MI->getOperand(i).isUndef()) {
+ MI->getOperand(i).setReg(0);
+ continue;
+ }
unsigned SrcReg = MI->getOperand(i).getReg();
unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
unsigned SubIdx = MI->getOperand(i+1).getImm();
@@ -1841,7 +1864,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
MachineInstr *DefMI = NULL;
if (!MI->getOperand(i).getSubReg() &&
!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
- DefMI = MRI->getVRegDef(SrcReg);
+ DefMI = MRI->getUniqueVRegDef(SrcReg);
}
if (DefMI && DefMI->isImplicitDef()) {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 3bab93b..93840f0 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -18,12 +18,14 @@
#define DEBUG_TYPE "regalloc"
#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -104,11 +106,149 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
Virt2StackSlotMap[virtReg] = SS;
}
-void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+void VirtRegMap::print(raw_ostream &OS, const Module*) const {
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << MRI->getRegClass(Reg)->getName() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << MRI->getRegClass(Reg)->getName() << "\n";
+ }
+ }
+ OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+ print(dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// VirtRegRewriter
+//===----------------------------------------------------------------------===//
+//
+// The VirtRegRewriter is the last of the register allocator passes.
+// It rewrites virtual registers to physical registers as specified in the
+// VirtRegMap analysis. It also updates live-in information on basic blocks
+// according to LiveIntervals.
+//
+namespace {
+class VirtRegRewriter : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
+ VirtRegMap *VRM;
+
+ void rewrite();
+ void addMBBLiveIns();
+public:
+ static char ID;
+ VirtRegRewriter() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
+
+char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
+
+INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+
+char VirtRegRewriter::ID = 0;
+
+void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addRequired<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ TM = &MF->getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MRI = &MF->getRegInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
<< "********** Function: "
<< MF->getFunction()->getName() << '\n');
- DEBUG(dump());
+ DEBUG(VRM->dump());
+
+ // Add kill flags while we still have virtual registers.
+ LIS->addKillFlags();
+
+ // Live-in lists on basic blocks are required for physregs.
+ addMBBLiveIns();
+
+ // Rewrite virtual registers.
+ rewrite();
+
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
+ return true;
+}
+
+// Compute MBB live-in lists from virtual register live ranges and their
+// assignments.
+void VirtRegRewriter::addMBBLiveIns() {
+ SmallVector<MachineBasicBlock*, 16> LiveIn;
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
+ if (MRI->reg_nodbg_empty(VirtReg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ if (LI.empty() || LIS->intervalIsInOneMBB(LI))
+ continue;
+ // This is a virtual register that is live across basic blocks. Its
+ // assigned PhysReg must be marked as live-in to those blocks.
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
+
+ // Scan the segments of LI.
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E;
+ ++I) {
+ if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn))
+ continue;
+ for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
+ if (!LiveIn[i]->isLiveIn(PhysReg))
+ LiveIn[i]->addLiveIn(PhysReg);
+ LiveIn.clear();
+ }
+ }
+}
+
+void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
@@ -135,8 +275,9 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
unsigned VirtReg = MO.getReg();
- unsigned PhysReg = getPhys(VirtReg);
- assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
+ "Instruction uses unmapped VirtReg");
assert(!Reserved.test(PhysReg) && "Reserved register assignment");
// Preserve semantics of sub-register operands.
@@ -207,31 +348,3 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
if (!MRI->reg_nodbg_empty(Reg))
MRI->setPhysRegUsed(Reg);
}
-
-void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
- const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
- const MachineRegisterInfo &MRI = MF->getRegInfo();
-
- OS << "********** REGISTER MAP **********\n";
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
- OS << '[' << PrintReg(Reg, TRI) << " -> "
- << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
- << MRI.getRegClass(Reg)->getName() << "\n";
- }
- }
-
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
- OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
- << "] " << MRI.getRegClass(Reg)->getName() << "\n";
- }
- }
- OS << '\n';
-}
-
-void VirtRegMap::dump() const {
- print(dbgs());
-}
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 8cac311..c320985 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -177,13 +177,6 @@ namespace llvm {
/// the specified stack slot
void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
- /// rewrite - Rewrite all instructions in MF to use only physical registers
- /// by mapping all virtual register operands to their assigned physical
- /// registers.
- ///
- /// @param Indexes Optionally remove deleted instructions from indexes.
- void rewrite(SlotIndexes *Indexes);
-
void print(raw_ostream &OS, const Module* M = 0) const;
void dump() const;
};