aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp2
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp233
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp11
-rw-r--r--lib/CodeGen/LiveInterval.cpp20
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp89
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp4
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp36
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp74
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp80
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp20
-rw-r--r--lib/CodeGen/MachineCSE.cpp49
-rw-r--r--lib/CodeGen/MachineInstr.cpp165
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp86
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp2
-rw-r--r--lib/CodeGen/MachineSink.cpp17
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp1153
-rw-r--r--lib/CodeGen/MachineTraceMetrics.h341
-rw-r--r--lib/CodeGen/MachineVerifier.cpp662
-rw-r--r--lib/CodeGen/Passes.cpp12
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp108
-rw-r--r--lib/CodeGen/RegAllocFast.cpp12
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp43
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp96
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp65
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp221
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp9
-rw-r--r--lib/CodeGen/SplitKit.cpp9
-rw-r--r--lib/CodeGen/StackProtector.cpp70
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp4
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp14
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp585
42 files changed, 3210 insertions, 1136 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b48b5af..7364f42 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -220,16 +220,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
case GlobalValue::CommonLinkage:
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
if (MAI->getWeakDefDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
if ((GlobalValue::LinkageTypes)Linkage !=
- GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ GlobalValue::LinkOnceODRAutoHideLinkage)
// .weak_definition _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
else
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d231665..d30e5bb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing dwarf compile unit.
+// This file contains support for constructing a dwarf compile unit.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index d240389..2e189ad 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -61,6 +61,7 @@ add_llvm_library(LLVMCodeGen
MachineSSAUpdater.cpp
MachineScheduler.cpp
MachineSink.cpp
+ MachineTraceMetrics.cpp
MachineVerifier.cpp
OcamlGC.cpp
OptimizePHIs.cpp
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 9840a40..f9347ef 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,12 +17,14 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "early-ifcvt"
+#include "MachineTraceMetrics.h"
#include "llvm/Function.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +32,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -48,7 +51,10 @@ BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
cl::desc("Turn all knobs to 11"));
-typedef SmallSetVector<MachineBasicBlock*, 8> BlockSetVector;
+STATISTIC(NumDiamondsSeen, "Number of diamonds");
+STATISTIC(NumDiamondsConv, "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
//===----------------------------------------------------------------------===//
// SSAIfConv
@@ -94,6 +100,12 @@ public:
/// equal to Tail.
bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+ /// Returns the Tail predecessor for the True side.
+ MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+ /// Returns the Tail predecessor for the False side.
+ MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
/// Information about each phi in the Tail block.
struct PHIInfo {
MachineInstr *PHI;
@@ -132,6 +144,12 @@ private:
/// Find a valid insertion point in Head.
bool findInsertionPoint();
+ /// Replace PHI instructions in Tail with selects.
+ void replacePHIInstrs();
+
+ /// Insert selects and rewrite PHI operands to use them.
+ void rewritePHIOperands();
+
public:
/// runOnMachineFunction - Initialize per-function data structures.
void runOnMachineFunction(MachineFunction &MF) {
@@ -335,11 +353,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
return false;
- // We could support additional Tail predecessors by updating phis instead of
- // eliminating them. Let's see an example where it matters first.
Tail = Succ0->succ_begin()[0];
- if (Tail->pred_size() != 2)
- return false;
// This is not a triangle.
if (Tail != Succ1) {
@@ -389,8 +403,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
// Any phis in the tail block must be convertible to selects.
PHIs.clear();
- MachineBasicBlock *TPred = TBB == Tail ? Head : TBB;
- MachineBasicBlock *FPred = FBB == Tail ? Head : FBB;
+ MachineBasicBlock *TPred = getTPred();
+ MachineBasicBlock *FPred = getFPred();
for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
I != E && I->isPHI(); ++I) {
PHIs.push_back(&*I);
@@ -426,24 +440,18 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
if (!findInsertionPoint())
return false;
+ if (isTriangle())
+ ++NumTrianglesSeen;
+ else
+ ++NumDiamondsSeen;
return true;
}
-
-/// convertIf - Execute the if conversion after canConvertIf has determined the
-/// feasibility.
-///
-/// Any basic blocks erased will be added to RemovedBlocks.
-///
-void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
- assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
-
- // Move all instructions into Head, except for the terminators.
- if (TBB != Tail)
- Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
- if (FBB != Tail)
- Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
-
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+ assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
assert(FirstTerm != Head->end() && "No terminators");
DebugLoc HeadDL = FirstTerm->getDebugLoc();
@@ -459,6 +467,66 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
PI.PHI->eraseFromParent();
PI.PHI = 0;
}
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+
+ // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+ for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+ MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+ if (MBB == getTPred()) {
+ PI.PHI->getOperand(i-1).setMBB(Head);
+ PI.PHI->getOperand(i-2).setReg(DstReg);
+ } else if (MBB == getFPred()) {
+ PI.PHI->RemoveOperand(i-1);
+ PI.PHI->RemoveOperand(i-2);
+ }
+ }
+ DEBUG(dbgs() << " --> " << *PI.PHI);
+ }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Update statistics.
+ if (isTriangle())
+ ++NumTrianglesConv;
+ else
+ ++NumDiamondsConv;
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail)
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ if (FBB != Tail)
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+ // Are there extra Tail predecessors?
+ bool ExtraPreds = Tail->pred_size() != 2;
+ if (ExtraPreds)
+ rewritePHIOperands();
+ else
+ replacePHIInstrs();
// Fix up the CFG, temporarily leave Head without any successors.
Head->removeSuccessor(TBB);
@@ -470,6 +538,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
// Fix up Head's terminators.
// It should become a single branch or a fallthrough.
+ DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
TII->RemoveBranch(*Head);
// Erase the now empty conditional blocks. It is likely that Head can fall
@@ -484,7 +553,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
}
assert(Head->succ_empty() && "Additional head successors?");
- if (Head->isLayoutSuccessor(Tail)) {
+ if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
// Splice Tail onto the end of Head.
DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
<< " into head BB#" << Head->getNumber() << '\n');
@@ -512,9 +581,12 @@ namespace {
class EarlyIfConverter : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const MCSchedModel *SchedModel;
MachineRegisterInfo *MRI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
SSAIfConv IfConv;
public:
@@ -527,6 +599,8 @@ private:
bool tryConvertIf(MachineBasicBlock*);
void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+ void invalidateTraces();
+ bool shouldConvertIf();
};
} // end anonymous namespace
@@ -537,6 +611,7 @@ INITIALIZE_PASS_BEGIN(EarlyIfConverter,
"early-ifcvt", "Early If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(EarlyIfConverter,
"early-ifcvt", "Early If Converter", false, false)
@@ -546,6 +621,8 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -576,12 +653,117 @@ void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
Loops->removeBlock(Removed[i]);
}
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+ Traces->verifyAnalysis();
+ Traces->invalidate(IfConv.Head);
+ Traces->invalidate(IfConv.Tail);
+ Traces->invalidate(IfConv.TBB);
+ Traces->invalidate(IfConv.FBB);
+ Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+ if (Delta < 0 && Cyc + Delta > Cyc)
+ return 0;
+ return Cyc + Delta;
+}
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+ // Stress testing mode disables all cost considerations.
+ if (Stress)
+ return true;
+
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+ MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+ DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+ FBBTrace.getCriticalPath());
+
+ // Set a somewhat arbitrary limit on the critical path extension we accept.
+ unsigned CritLimit = SchedModel->MispredictPenalty/2;
+
+ // If-conversion only makes sense when there is unexploited ILP. Compute the
+ // maximum-ILP resource length of the trace after if-conversion. Compare it
+ // to the shortest critical path.
+ SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+ if (IfConv.TBB != IfConv.Tail)
+ ExtraBlocks.push_back(IfConv.TBB);
+ unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+ DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
+ if (ResLength > MinCrit + CritLimit) {
+ DEBUG(dbgs() << "Not enough available ILP.\n");
+ return false;
+ }
+
+ // Assume that the depth of the first head terminator will also be the depth
+ // of the select instruction inserted, as determined by the flag dependency.
+ // TBB / FBB data dependencies may delay the select even more.
+ MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+ unsigned BranchDepth =
+ HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth;
+ DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+ // Look at all the tail phis, and compute the critical path extension caused
+ // by inserting select instructions.
+ MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+ SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ unsigned Slack = TailTrace.getInstrSlack(PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth;
+ DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+ // The condition is pulled into the critical path.
+ unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+ if (CondDepth > MaxDepth) {
+ unsigned Extra = CondDepth - MaxDepth;
+ DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The TBB value is pulled into the critical path.
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles);
+ if (TDepth > MaxDepth) {
+ unsigned Extra = TDepth - MaxDepth;
+ DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The FBB value is pulled into the critical path.
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles);
+ if (FDepth > MaxDepth) {
+ unsigned Extra = FDepth - MaxDepth;
+ DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
/// Attempt repeated if-conversion on MBB, return true if successful.
///
bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
bool Changed = false;
- while (IfConv.canConvertIf(MBB)) {
+ while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
// If-convert MBB and update analyses.
+ invalidateTraces();
SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
IfConv.convertIf(RemovedBlocks);
Changed = true;
@@ -597,9 +779,12 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
<< ((Value*)MF.getFunction())->getName() << '\n');
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
+ SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel;
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
bool Changed = false;
IfConv.runOnMachineFunction(MF);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index b14afc2..7a17331 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
} else {
TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
MI->getOperand(2).isKill());
+
+ // Implicitly define DstReg for subsequent uses.
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+ CopyMI->addRegisterDefined(DstReg);
+
// Transfer the kill/dead flags, if needed.
if (MI->getOperand(0).isDead())
TransferDeadFlag(MI, DstSubReg, TRI);
- DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "subreg: " << *(--dMI);
- });
+ DEBUG(dbgs() << "subreg: " << *CopyMI);
}
DEBUG(dbgs() << '\n');
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 01077db..0a795e6 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -160,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
valnos.pop_back();
} while (!valnos.empty() && valnos.back()->isUnused());
} else {
- ValNo->setIsUnused(true);
+ ValNo->markUnused();
}
}
@@ -667,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
}
}
- // Merge the relevant flags.
- V2->mergeFlags(V1);
-
// Now that V1 is dead, remove it.
markValNoForDeletion(V1);
@@ -737,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS) const {
} else {
OS << vni->def;
if (vni->isPHIDef())
- OS << "-phidef";
- if (vni->hasPHIKill())
- OS << "-phikill";
+ OS << "-phi";
}
}
}
@@ -827,14 +822,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
MachineOperand &MO = RI.getOperand();
MachineInstr *MI = MO.getParent();
++RI;
- if (MO.isUse() && MO.isUndef())
- continue;
// DBG_VALUE instructions should have been eliminated earlier.
- SlotIndex Idx = LIS.getInstructionIndex(MI);
- Idx = Idx.getRegSlot(MO.isUse());
- const VNInfo *VNI = LI.getVNInfoAt(Idx);
- // FIXME: We should be able to assert(VNI) here, but the coalescer leaves
- // dangling defs around.
+ LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI));
+ const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ // In the case of an <undef> use that isn't tied to any def, VNI will be
+ // NULL. If the use is tied to a def, VNI will be the defined value.
if (!VNI)
continue;
MO.setReg(LIV[getEqClass(VNI)]->reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 819707f..d0f8ae1 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -38,7 +39,13 @@
#include <cmath>
using namespace llvm;
+// Switch to the new experimental algorithm for computing live intervals.
+static cl::opt<bool>
+NewLiveIntervals("new-live-intervals", cl::Hidden,
+ cl::desc("Use new algorithm forcomputing live intervals"));
+
char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -105,7 +112,19 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
AllocatableRegs = TRI->getAllocatableSet(fn);
ReservedRegs = TRI->getReservedRegs(fn);
- computeIntervals();
+ // Allocate space for all virtual registers.
+ VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+ if (NewLiveIntervals) {
+ // This is the new way of computing live intervals.
+ // It is independent of LiveVariables, and it can run at any time.
+ computeVirtRegs();
+ computeRegMasks();
+ } else {
+ // This is the old way of computing live intervals.
+ // It depends on LiveVariables.
+ computeIntervals();
+ }
computeLiveInRegUnits();
DEBUG(dump());
@@ -238,7 +257,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// new valno in the killing blocks.
assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
DEBUG(dbgs() << " phi-join");
- ValNo->setHasPHIKill(true);
} else {
// Iterate over all of the blocks that the variable is completely
// live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
@@ -266,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
assert(getInstructionFromIndex(Start) == 0 &&
"PHI def index points at actual instruction.");
ValNo = interval.getNextValue(Start, VNInfoAllocator);
- ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
interval.addRange(LR);
@@ -340,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
- ValNo->setHasPHIKill(true);
DEBUG(dbgs() << " phi-join +" << LR);
} else {
llvm_unreachable("Multiply defined register");
@@ -442,6 +458,49 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
}
+/// computeVirtRegInterval - Compute the live interval of a virtual register,
+/// based on defs and uses.
+void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ assert(LI->empty() && "Should only compute empty intervals.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->createDeadDefs(LI);
+ LRCalc->extendToUses(LI);
+}
+
+void LiveIntervals::computeVirtRegs() {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = createInterval(Reg);
+ VirtRegIntervals[Reg] = LI;
+ computeVirtRegInterval(LI);
+ }
+}
+
+void LiveIntervals::computeRegMasks() {
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+ // Find all instructions with regmask operands.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ RMB.first = RegMaskSlots.size();
+ for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
+ MI != ME; ++MI)
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isRegMask())
+ continue;
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+ RegMaskBits.push_back(MO->getRegMask());
+ }
+ // Compute the number of register mask instructions in this block.
+ RMB.second = RegMaskSlots.size() - RMB.first;;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Register Unit Liveness
//===----------------------------------------------------------------------===//
@@ -648,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
- VNI->setIsUnused(true);
+ VNI->markUnused();
NewLI.removeRange(*LII);
DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
CanSeparate = true;
@@ -720,6 +779,25 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
return MBB1 == MBB2 ? MBB1 : NULL;
}
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *PHI = *I;
+ if (PHI->isUnused() || !PHI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+ // Conservatively return true instead of scanning huge predecessor lists.
+ if (PHIMBB->pred_size() > 100)
+ return true;
+ for (MachineBasicBlock::const_pred_iterator
+ PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ return true;
+ }
+ return false;
+}
+
float
LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
// Limit the loop depth ridiculousness.
@@ -744,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
VNInfo* VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getVNInfoAllocator());
- VN->setHasPHIKill(true);
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getMBBEndIdx(startInst->getParent()), VN);
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 9384075..d828f25 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -54,8 +54,7 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
.getRegSlot(I.getOperand().isEarlyClobber());
// Create the def in LI. This may find an existing def.
- VNInfo *VNI = LI->createDeadDef(Idx, *Alloc);
- VNI->setIsPHIDef(MI->isPHI());
+ LI->createDeadDef(Idx, *Alloc);
}
}
@@ -320,7 +319,6 @@ void LiveRangeCalc::updateSSA() {
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(MBB);
VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
- VNI->setIsPHIDef(true);
I->Value = VNI;
// This block is done, we know the final value.
I->DomNode = 0;
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 896fdbf..b4ce9aa 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -239,6 +239,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// Collect virtual registers to be erased after MI is gone.
SmallVector<unsigned, 8> RegsToErase;
+ bool ReadsPhysRegs = false;
// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
@@ -246,8 +247,12 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
if (!MOI->isReg())
continue;
unsigned Reg = MOI->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check if MI reads any unreserved physregs.
+ if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+ ReadsPhysRegs = true;
continue;
+ }
LiveInterval &LI = LIS.getInterval(Reg);
// Shrink read registers, unless it is likely to be expensive and
@@ -271,11 +276,30 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
}
}
- if (TheDelegate)
- TheDelegate->LRE_WillEraseInstruction(MI);
- LIS.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- ++NumDCEDeleted;
+ // Currently, we don't support DCE of physreg live ranges. If MI reads
+ // any unreserved physregs, don't erase the instruction, but turn it into
+ // a KILL instead. This way, the physreg live ranges don't end up
+ // dangling.
+ // FIXME: It would be better to have something like shrinkToUses() for
+ // physregs. That could potentially enable more DCE and it would free up
+ // the physreg. It would not happen often, though.
+ if (ReadsPhysRegs) {
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ // Remove all operands that aren't physregs.
+ for (unsigned i = MI->getNumOperands(); i; --i) {
+ const MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ MI->RemoveOperand(i-1);
+ }
+ DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
// Erase any virtregs that are now empty and unused. There may be <undef>
// uses around. Keep the empty live range in that case.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index ecc1e95..cf13dbd 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() != 0 && "machine instruction not in a basic block");
// Remove from the use/def lists.
- N->RemoveRegOperandsFromUseLists();
+ if (MachineFunction *MF = N->getParent()->getParent())
+ N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
N->setParent(0);
@@ -310,8 +311,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
if (!succ_empty()) {
if (Indexes) OS << '\t';
OS << " Successors according to CFG:";
- for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
OS << " BB#" << (*SI)->getNumber();
+ if (!Weights.empty())
+ OS << '(' << *getWeightIterator(SI) << ')';
+ }
OS << '\n';
}
}
@@ -477,18 +481,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) {
void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
MachineBasicBlock *New) {
- uint32_t weight = 0;
- succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+ if (Old == New)
+ return;
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!Weights.empty()) {
- weight_iterator WI = getWeightIterator(SI);
- weight = *WI;
+ succ_iterator E = succ_end();
+ succ_iterator NewI = E;
+ succ_iterator OldI = E;
+ for (succ_iterator I = succ_begin(); I != E; ++I) {
+ if (*I == Old) {
+ OldI = I;
+ if (NewI != E)
+ break;
+ }
+ if (*I == New) {
+ NewI = I;
+ if (OldI != E)
+ break;
+ }
}
+ assert(OldI != E && "Old is not a successor of this block");
+ Old->removePredecessor(this);
- // Update the successor information.
- removeSuccessor(SI);
- addSuccessor(New, weight);
+ // If New isn't already a successor, let it take Old's place.
+ if (NewI == E) {
+ New->addPredecessor(this);
+ *OldI = New;
+ return;
+ }
+
+ // New is already a successor.
+ // Update its weight instead of adding a duplicate edge.
+ if (!Weights.empty()) {
+ weight_iterator OldWI = getWeightIterator(OldI);
+ *getWeightIterator(NewI) += *OldWI;
+ Weights.erase(OldWI);
+ }
+ Successors.erase(OldI);
}
void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
@@ -507,14 +535,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- uint32_t weight = 0;
-
+ uint32_t Weight = 0;
// If Weight list is empty it means we don't use it (disabled optimization).
if (!fromMBB->Weights.empty())
- weight = *fromMBB->Weights.begin();
+ Weight = *fromMBB->Weights.begin();
- addSuccessor(Succ, weight);
+ addSuccessor(Succ, Weight);
fromMBB->removeSuccessor(Succ);
}
}
@@ -526,7 +553,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- addSuccessor(Succ);
+ uint32_t Weight = 0;
+ if (!fromMBB->Weights.empty())
+ Weight = *fromMBB->Weights.begin();
+ addSuccessor(Succ, Weight);
fromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
@@ -540,9 +570,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
}
}
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+ return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+}
+
bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
- const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
- return I != Successors.end();
+ return std::find(succ_begin(), succ_end(), MBB) != succ_end();
}
bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
@@ -909,12 +942,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
/// getSuccWeight - Return weight of the edge from this block to MBB.
///
-uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
+uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
if (Weights.empty())
return 0;
- const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
- return *getWeightIterator(I);
+ return *getWeightIterator(Succ);
}
/// getWeightIterator - Return wight iterator corresonding to the I successor
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 5a15f92..c4dca2c 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -985,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// boiler plate.
Cond.clear();
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+ if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // If PrevBB has a two-way branch, try to re-order the branches
+ // such that we branch to the successor with higher weight first.
+ if (TBB && !Cond.empty() && FBB &&
+ MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+ !TII->ReverseBranchCondition(Cond)) {
+ DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(PrevBB) << "\n");
+ DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
+ << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PrevBB);
+ TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+ }
PrevBB->updateTerminator();
+ }
}
// Fixup the last block.
@@ -997,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Walk through the backedges of the function now that we have fully laid out
// the basic blocks and align the destination of each backedge. We don't rely
- // on the loop info here so that we can align backedges in unnatural CFGs and
- // backedges that were introduced purely because of the loop rotations done
- // during this layout pass.
- // FIXME: This isn't quite right, we shouldn't align backedges that result
- // from blocks being sunken below the exit block for the function.
+ // exclusively on the loop info here so that we can align backedges in
+ // unnatural CFGs and backedges that were introduced purely because of the
+ // loop rotations done during this layout pass.
if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
return;
unsigned Align = TLI->getPrefLoopAlignment();
if (!Align)
return; // Don't care about loop alignment.
+ if (FunctionChain.begin() == FunctionChain.end())
+ return; // Empty chain.
- SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks;
- for (BlockChain::iterator BI = FunctionChain.begin(),
+ const BranchProbability ColdProb(1, 5); // 20%
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+ BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+ for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
BE = FunctionChain.end();
BI != BE; ++BI) {
- PreviousBlocks.insert(*BI);
- // Set alignment on the destination of all the back edges in the new
- // ordering.
- for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(),
- SE = (*BI)->succ_end();
- SI != SE; ++SI)
- if (PreviousBlocks.count(*SI))
- (*SI)->setAlignment(Align);
+ // Don't align non-looping basic blocks. These are unlikely to execute
+ // enough times to matter in practice. Note that we'll still handle
+ // unnatural CFGs inside of a natural outer loop (the common case) and
+ // rotated loops.
+ MachineLoop *L = MLI->getLoopFor(*BI);
+ if (!L)
+ continue;
+
+ // If the block is cold relative to the function entry don't waste space
+ // aligning it.
+ BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ if (Freq < WeightedEntryFreq)
+ continue;
+
+ // If the block is cold relative to its loop header, don't align it
+ // regardless of what edges into the block exist.
+ MachineBasicBlock *LoopHeader = L->getHeader();
+ BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+ if (Freq < (LoopHeaderFreq * ColdProb))
+ continue;
+
+ // Check for the existence of a non-layout predecessor which would benefit
+ // from aligning this block.
+ MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+
+ // Force alignment if all the predecessors are jumps. We already checked
+ // that the block isn't cold above.
+ if (!LayoutPred->isSuccessor(*BI)) {
+ (*BI)->setAlignment(Align);
+ continue;
+ }
+
+ // Align this block if the layout predecessor's edge into this block is
+ // cold relative to the block. When this is true, othe predecessors make up
+ // all of the hot entries into the block and thus alignment is likely to be
+ // important.
+ BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+ if (LayoutEdgeFreq <= (Freq * ColdProb))
+ (*BI)->setAlignment(Align);
}
}
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 0cc1af0..4479211 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
Scale = 1;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
Sum += Weight;
}
@@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
Sum = 0;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
Sum += Weight / Scale;
}
assert(Sum <= UINT32_MAX);
return Sum;
}
-uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
uint32_t Weight = Src->getSuccWeight(Dst);
if (!Weight)
return DEFAULT_WEIGHT;
return Weight;
}
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ // This is a linear search. Try to use the const_succ_iterator version when
+ // possible.
+ return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
MachineBasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
@@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
MachineBasicBlock *MaxSucc = 0;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
if (Weight > MaxWeight) {
MaxWeight = Weight;
MaxSucc = *I;
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 9cfe9ab..896461f 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -215,8 +215,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (MO.isDef() &&
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- PhysRefs.insert(*AI);
+ // Reading constant physregs is ok.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
if (MO.isDef())
PhysDefs.push_back(Reg);
}
@@ -324,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
+ // If CSReg is used at all uses of Reg, CSE should not increase register
+ // pressure of CSReg.
+ bool MayIncreasePressure = true;
+ if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MayIncreasePressure = false;
+ SmallPtrSet<MachineInstr*, 8> CSUses;
+ for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ CSUses.insert(Use);
+ }
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ if (!CSUses.count(Use)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ }
+ if (!MayIncreasePressure) return true;
+
// Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
@@ -394,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool Changed = false;
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
MachineInstr *MI = &*I;
++I;
@@ -463,15 +489,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumDefs = MI->getDesc().getNumDefs() +
+ MI->getDesc().getNumImplicitDefs();
+
for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned OldReg = MO.getReg();
unsigned NewReg = CSMI->getOperand(i).getReg();
- if (OldReg == NewReg)
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+ if (OldReg == NewReg) {
+ --NumDefs;
continue;
+ }
assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
TargetRegisterInfo::isVirtualRegister(NewReg) &&
@@ -503,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->clearKillFlags(CSEPairs[i].second);
}
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
+ CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+
if (CrossMBBPhysDef) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
@@ -526,6 +566,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
Exps.push_back(MI);
}
CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
}
return Changed;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 8dada05..b166849 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -47,55 +47,6 @@ using namespace llvm;
// MachineOperand Implementation
//===----------------------------------------------------------------------===//
-/// AddRegOperandToRegInfo - Add this register operand to the specified
-/// MachineRegisterInfo. If it is null, then the next/prev fields should be
-/// explicitly nulled out.
-void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
- assert(isReg() && "Can only add reg operand to use lists");
-
- // If the reginfo pointer is null, just explicitly null out or next/prev
- // pointers, to ensure they are not garbage.
- if (RegInfo == 0) {
- Contents.Reg.Prev = 0;
- Contents.Reg.Next = 0;
- return;
- }
-
- // Otherwise, add this operand to the head of the registers use/def list.
- MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
-
- // For SSA values, we prefer to keep the definition at the start of the list.
- // we do this by skipping over the definition if it is at the head of the
- // list.
- if (*Head && (*Head)->isDef())
- Head = &(*Head)->Contents.Reg.Next;
-
- Contents.Reg.Next = *Head;
- if (Contents.Reg.Next) {
- assert(getReg() == Contents.Reg.Next->getReg() &&
- "Different regs on the same list!");
- Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
- }
-
- Contents.Reg.Prev = Head;
- *Head = this;
-}
-
-/// RemoveRegOperandFromRegInfo - Remove this register operand from the
-/// MachineRegisterInfo it is linked with.
-void MachineOperand::RemoveRegOperandFromRegInfo() {
- assert(isOnRegUseList() && "Reg operand is not on a use list");
- // Unlink this from the doubly linked list of operands.
- MachineOperand *NextOp = Contents.Reg.Next;
- *Contents.Reg.Prev = NextOp;
- if (NextOp) {
- assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
- NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
- }
- Contents.Reg.Prev = 0;
- Contents.Reg.Next = 0;
-}
-
void MachineOperand::setReg(unsigned Reg) {
if (getReg() == Reg) return; // No change.
@@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) {
if (MachineInstr *MI = getParent())
if (MachineBasicBlock *MBB = MI->getParent())
if (MachineFunction *MF = MBB->getParent()) {
- RemoveRegOperandFromRegInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
SmallContents.RegNo = Reg;
- AddRegOperandToRegInfo(&MF->getRegInfo());
+ MRI.addRegOperandToUseList(this);
return;
}
@@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
setReg(Reg);
}
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ // MRI may keep uses and defs in different list positions.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
// If this operand is currently a register operand, and if this is in a
// function, deregister the operand from the register's use/def list.
- if (isReg() && getParent() && getParent()->getParent() &&
- getParent()->getParent()->getParent())
- RemoveRegOperandFromRegInfo();
+ if (isReg() && isOnRegUseList())
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
@@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
- // If this operand is already a register operand, use setReg to update the
+ MachineRegisterInfo *RegInfo = 0;
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
// register's use/def lists.
- if (isReg()) {
- assert(!isEarlyClobber());
- setReg(Reg);
- } else {
- // Otherwise, change this to a register and set the reg#.
- OpKind = MO_Register;
- SmallContents.RegNo = Reg;
-
- // If this operand is embedded in a function, add the operand to the
- // register's use/def list.
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent())
- AddRegOperandToRegInfo(&MF->getRegInfo());
- }
+ if (RegInfo && isReg())
+ RegInfo->removeRegOperandFromUseList(this);
+ // Change this to a register and set the reg#.
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg = 0;
IsDef = isDef;
IsImp = isImp;
IsKill = isKill;
@@ -182,7 +151,13 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsInternalRead = false;
IsEarlyClobber = false;
IsDebug = isDebug;
- SubReg = 0;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
}
/// isIdenticalTo - Return true if this operand is identical to the specified
@@ -208,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
case MachineOperand::MO_FrameIndex:
return getIndex() == Other.getIndex();
case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
case MachineOperand::MO_JumpTableIndex:
return getIndex() == Other.getIndex();
@@ -245,6 +221,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
case MachineOperand::MO_FrameIndex:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
MO.getOffset());
case MachineOperand::MO_JumpTableIndex:
@@ -353,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
+ case MachineOperand::MO_TargetIndex:
+ OS << "<ti#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
case MachineOperand::MO_JumpTableIndex:
OS << "<jt#" << getIndex() << '>';
break;
@@ -650,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands already be on their use lists.
-void MachineInstr::RemoveRegOperandsFromUseLists() {
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
- }
+ MRI.removeRegOperandFromUseList(&Operands[i]);
}
/// AddRegOperandsToUseLists - Add all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands not be on their use lists yet.
-void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(&RegInfo);
- }
+ MRI.addRegOperandToUseList(&Operands[i]);
}
-
/// addOperand - Add the specified operand to the instruction. If it is an
/// implicit operand, it is added to the end of the operand list. If it is
/// an explicit operand it is added at the end of the explicit operand list
@@ -695,7 +674,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
--OpNo;
if (RegInfo)
- Operands[OpNo].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
}
}
@@ -712,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Reallocate)
for (unsigned i = 0; i != OpNo; ++i)
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[i]);
// Insert the new operand at OpNo.
Operands.insert(Operands.begin() + OpNo, Op);
@@ -723,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Reallocate)
for (unsigned i = 0; i != OpNo; ++i)
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
// When adding a register operand, tell RegInfo about it.
if (Operands[OpNo].isReg()) {
- // Add the new operand to RegInfo, even when RegInfo is NULL.
- // This will initialize the linked list pointers.
- Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+ // Ensure isOnRegUseList() returns false, regardless of Op's status.
+ Operands[OpNo].Contents.Reg.Prev = 0;
+ // Add the new operand to RegInfo.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(&Operands[OpNo]);
// If the register operand is flagged as early, mark the operand as such.
if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
Operands[OpNo].setIsEarlyClobber(true);
@@ -739,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (RegInfo) {
for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
assert(Operands[i].isReg() && "Should only be an implicit reg!");
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
}
}
}
@@ -749,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
///
void MachineInstr::RemoveOperand(unsigned OpNo) {
assert(OpNo < Operands.size() && "Invalid operand number");
+ MachineRegisterInfo *RegInfo = getRegInfo();
// Special case removing the last one.
if (OpNo == Operands.size()-1) {
// If needed, remove from the reg def/use list.
- if (Operands.back().isReg() && Operands.back().isOnRegUseList())
- Operands.back().RemoveRegOperandFromRegInfo();
+ if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList())
+ RegInfo->removeRegOperandFromUseList(&Operands.back());
Operands.pop_back();
return;
@@ -763,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
// Otherwise, we are removing an interior operand. If we have reginfo to
// update, remove all operands that will be shifted down from their reg lists,
// move everything down, then re-add them.
- MachineRegisterInfo *RegInfo = getRegInfo();
if (RegInfo) {
for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[i]);
}
}
@@ -776,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
if (RegInfo) {
for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
}
}
}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 82e1235..5fb938f 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
// New virtual register number.
unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-
- // Add a reg, but keep track of whether the vector reallocated or not.
- const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
- void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
VRegInfo.grow(Reg);
VRegInfo[Reg].first = RegClass;
RegAllocHints.grow(Reg);
-
- if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
- // The vector reallocated, handle this now.
- HandleVRegListReallocation();
return Reg;
}
@@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() {
VRegInfo.clear();
}
-/// HandleVRegListReallocation - We just added a virtual register to the
-/// VRegInfo info list and it reallocated. Update the use/def lists info
-/// pointers.
-void MachineRegisterInfo::HandleVRegListReallocation() {
- // The back pointers for the vreg lists point into the previous vector.
- // Update them to point to their correct slots.
- for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- MachineOperand *List = VRegInfo[Reg].second;
- if (!List) continue;
- // Update the back-pointer to be accurate once more.
- List->Contents.Reg.Prev = &VRegInfo[Reg].second;
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+ assert(!MO->isOnRegUseList() && "Already on list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+
+ // Head points to the first list element.
+ // Next is NULL on the last list element.
+ // Prev pointers are circular, so Head->Prev == Last.
+
+ // Head is NULL for an empty list.
+ if (!Head) {
+ MO->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Next = 0;
+ HeadRef = MO;
+ return;
+ }
+ assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+ // Insert MO between Last and Head in the circular Prev chain.
+ MachineOperand *Last = Head->Contents.Reg.Prev;
+ assert(Last && "Inconsistent use list");
+ assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+ Head->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Prev = Last;
+
+ // Def operands always precede uses. This allows def_iterator to stop early.
+ // Insert def operands at the front, and use operands at the back.
+ if (MO->isDef()) {
+ // Insert def at the front.
+ MO->Contents.Reg.Next = Head;
+ HeadRef = MO;
+ } else {
+ // Insert use at the end.
+ MO->Contents.Reg.Next = 0;
+ Last->Contents.Reg.Next = MO;
}
}
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+ assert(MO->isOnRegUseList() && "Operand not on use list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+ assert(Head && "List already empty");
+
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *Next = MO->Contents.Reg.Next;
+ MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+ // Prev links are circular, next link is NULL instead of looping back to Head.
+ if (MO == Head)
+ HeadRef = Next;
+ else
+ Prev->Contents.Reg.Next = Next;
+
+ (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+ MO->Contents.Reg.Prev = 0;
+ MO->Contents.Reg.Next = 0;
+}
+
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
@@ -178,13 +217,6 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
return &*I;
}
-bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
- use_iterator UI = use_begin(RegNo);
- if (UI == use_end())
- return false;
- return ++UI == use_end();
-}
-
bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
use_nodbg_iterator UI = use_nodbg_begin(RegNo);
if (UI == use_nodbg_end())
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index acb1ee6..076547a 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
}
MachineSSAUpdater::~MachineSSAUpdater() {
- delete &getAvailableVals(AV);
+ delete static_cast<AvailableValsTy*>(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1ce546b..bc383cb 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -99,6 +99,16 @@ namespace {
bool PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB);
};
+
+ // SuccessorSorter - Sort Successors according to their loop depth.
+ struct SuccessorSorter {
+ SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {}
+ bool operator()(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) const {
+ return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+ }
+ MachineLoopInfo *LI;
+ };
} // end anonymous namespace
char MachineSinking::ID = 0;
@@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++SI) {
+ // We give successors with smaller loop depth higher priority.
+ SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
+ std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
+ for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(),
+ E = Succs.end(); SI != E; ++SI) {
MachineBasicBlock *SuccBlock = *SI;
bool LocalUse = false;
if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 0000000..1a3aa60
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1153 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-trace-metrics"
+#include "MachineTraceMetrics.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
+
+using namespace llvm;
+
+char MachineTraceMetrics::ID = 0;
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics()
+ : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
+ std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ ItinData = MF->getTarget().getInstrItineraryData();
+ MRI = &MF->getRegInfo();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ BlockInfo.resize(MF->getNumBlockIDs());
+ return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+ MF = 0;
+ BlockInfo.clear();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i) {
+ delete Ensembles[i];
+ Ensembles[i] = 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+ assert(MBB && "No basic block");
+ FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+ if (FBI->hasResources())
+ return FBI;
+
+ // Compute resource usage in the block.
+ // FIXME: Compute per-functional unit counts.
+ FBI->HasCalls = false;
+ unsigned InstrCount = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *MI = I;
+ if (MI->isTransient())
+ continue;
+ ++InstrCount;
+ if (MI->isCall())
+ FBI->HasCalls = true;
+ }
+ FBI->InstrCount = InstrCount;
+ return FBI;
+}
+
+//===----------------------------------------------------------------------===//
+// Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+ : MTM(*ct) {
+ BlockInfo.resize(MTM.BlockInfo.size());
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() {}
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+ return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+ // Compute resources from trace above. The top block is simple.
+ if (!TBI->Pred) {
+ TBI->InstrDepth = 0;
+ TBI->Head = MBB->getNumber();
+ return;
+ }
+
+ // Compute from the block above. A post-order traversal ensures the
+ // predecessor is always computed first.
+ TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+ assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+ const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+ TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+ TBI->Head = PredTBI->Head;
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+ // Compute resources for the current block.
+ TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+
+ // The trace tail is done.
+ if (!TBI->Succ) {
+ TBI->Tail = MBB->getNumber();
+ return;
+ }
+
+ // Compute from the block below. A post-order traversal ensures the
+ // predecessor is always computed first.
+ TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+ assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+ TBI->InstrHeight += SuccTBI->InstrHeight;
+ TBI->Tail = SuccTBI->Tail;
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidDepth() ? TBI : 0;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidHeight() ? TBI : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+ return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+
+public:
+ MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+ : MachineTraceMetrics::Ensemble(mtm) {}
+};
+}
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ // Don't leave loops, and never follow back-edges.
+ if (CurLoop && MBB == CurLoop->getHeader())
+ return 0;
+ unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+ const MachineBasicBlock *Best = 0;
+ unsigned BestDepth = 0;
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ const MachineBasicBlock *Pred = *I;
+ const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+ getDepthResources(Pred);
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
+ // Pick the predecessor that would give this block the smallest InstrDepth.
+ unsigned Depth = PredTBI->InstrDepth + CurCount;
+ if (!Best || Depth < BestDepth)
+ Best = Pred, BestDepth = Depth;
+ }
+ return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ const MachineBasicBlock *Best = 0;
+ unsigned BestHeight = 0;
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ // Don't consider back-edges.
+ if (CurLoop && Succ == CurLoop->getHeader())
+ continue;
+ // Don't consider successors exiting CurLoop.
+ if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+ continue;
+ const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+ getHeightResources(Succ);
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
+ // Pick the successor that would give this block the smallest InstrHeight.
+ unsigned Height = SuccTBI->InstrHeight;
+ if (!Best || Height < BestHeight)
+ Best = Succ, BestHeight = Height;
+ }
+ return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
+ assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[strategy];
+ if (E)
+ return E;
+
+ // Allocate new Ensemble on demand.
+ switch (strategy) {
+ case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ default: llvm_unreachable("Invalid trace strategy enum");
+ }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+ BlockInfo[MBB->getNumber()].invalidate();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+ if (!MF)
+ return;
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+struct LoopBounds {
+ MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+ const MachineLoopInfo *Loops;
+ bool Downward;
+ LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+ const MachineLoopInfo *loops)
+ : Blocks(blocks), Loops(loops), Downward(false) {}
+};
+}
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+template<>
+class po_iterator_storage<LoopBounds, true> {
+ LoopBounds &LB;
+public:
+ po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+ void finishPostorder(const MachineBasicBlock*) {}
+
+ bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+ // Skip already visited To blocks.
+ MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+ if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+ return false;
+ // From is null once when To is the trace center block.
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To);
+ }
+};
+}
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
+ << MBB->getNumber() << '\n');
+ // Set up loop bounds for the backwards post-order traversal.
+ LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+ // Run an upwards post-order search for the trace start.
+ Bounds.Downward = false;
+ Bounds.Visited.clear();
+ typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
+ for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the predecessors have been visited, pick the preferred one.
+ TBI.Pred = pickTracePred(*I);
+ DEBUG({
+ if (TBI.Pred)
+ dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leading to I is now known, compute the depth resources.
+ computeDepthResources(*I);
+ }
+
+ // Run a downwards post-order search for the trace end.
+ Bounds.Downward = true;
+ Bounds.Visited.clear();
+ typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
+ for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the successors have been visited, pick the preferred one.
+ TBI.Succ = pickTraceSucc(*I);
+ DEBUG({
+ if (TBI.Succ)
+ dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leaving I is now known, compute the height resources.
+ computeHeightResources(*I);
+ }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+ SmallVector<const MachineBasicBlock*, 16> WorkList;
+ TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+ // Invalidate height resources of blocks above MBB.
+ if (BadTBI.hasValidHeight()) {
+ BadTBI.invalidateHeight();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " height.\n");
+ // Find any MBB predecessors that have MBB as their preferred successor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidHeight())
+ continue;
+ if (TBI.Succ == MBB) {
+ TBI.invalidateHeight();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Succ is actually a *I successor.
+ assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Invalidate depth resources of blocks below MBB.
+ if (BadTBI.hasValidDepth()) {
+ BadTBI.invalidateDepth();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " depth.\n");
+ // Find any MBB successors that have MBB as their preferred predecessor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidDepth())
+ continue;
+ if (TBI.Pred == MBB) {
+ TBI.invalidateDepth();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Pred is actually a *I predecessor.
+ assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Clear any per-instruction data. We only have to do this for BadMBB itself
+ // because the instructions in that block may change. Other blocks may be
+ // invalidated, but their instructions will stay the same, so there is no
+ // need to erase the Cycle entries. They will be overwritten when we
+ // recompute.
+ for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
+ I != E; ++I)
+ Cycles.erase(I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+ "Outdated BlockInfo size");
+ for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+ const TraceBlockInfo &TBI = BlockInfo[Num];
+ if (TBI.hasValidDepth() && TBI.Pred) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+ "Trace is broken, depth should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+ }
+ if (TBI.hasValidHeight() && TBI.Succ) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+ "Trace is broken, height should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+ assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+ "Trace contains backedge");
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+struct DataDep {
+ const MachineInstr *DefMI;
+ unsigned DefOp;
+ unsigned UseOp;
+
+ DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+ : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+ /// Create a DataDep from an SSA form virtual register.
+ DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+ : UseOp(UseOp) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+ assert(!DefI.atEnd() && "Register has no defs");
+ DefMI = &*DefI;
+ DefOp = DefI.getOperandNo();
+ assert((++DefI).atEnd() && "Register has multiple defs");
+ }
+};
+}
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineRegisterInfo *MRI) {
+ bool HasPhysRegs = false;
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ HasPhysRegs = true;
+ continue;
+ }
+ // Collect virtual register reads.
+ if (MO->readsReg())
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ }
+ return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineBasicBlock *Pred,
+ const MachineRegisterInfo *MRI) {
+ // No predecessor at the beginning of a trace. Ignore dependencies.
+ if (!Pred)
+ return;
+ assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
+ if (UseMI->getOperand(i + 1).getMBB() == Pred) {
+ unsigned Reg = UseMI->getOperand(i).getReg();
+ Deps.push_back(DataDep(MRI, Reg, i));
+ return;
+ }
+ }
+}
+
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+namespace {
+struct LiveRegUnit {
+ unsigned RegUnit;
+ unsigned Cycle;
+ const MachineInstr *MI;
+ unsigned Op;
+
+ unsigned getSparseSetIndex() const { return RegUnit; }
+
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+};
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> Kills;
+ SmallVector<unsigned, 8> LiveDefOps;
+
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // Track live defs and kills for updating RegUnits.
+ if (MO->isDef()) {
+ if (MO->isDead())
+ Kills.push_back(Reg);
+ else
+ LiveDefOps.push_back(MO.getOperandNo());
+ } else if (MO->isKill())
+ Kills.push_back(Reg);
+ // Identify dependencies.
+ if (!MO->readsReg())
+ continue;
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ break;
+ }
+ }
+
+ // Update RegUnits to reflect live registers after UseMI.
+ // First kills.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+ RegUnits.erase(*Units);
+
+ // Second, live defs.
+ for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
+ unsigned DefOp = LiveDefOps[i];
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ LRU.MI = UseMI;
+ LRU.Op = DefOp;
+ }
+ }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ const LiveInReg &LIR = TBI.LiveIns[i];
+ if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+ // The top of the trace may already be computed, and HasValidInstrDepths
+ // implies Head->HasValidInstrDepths, so we only need to start from the first
+ // block in the trace that needs to be recomputed.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidDepth() && "Incomplete trace");
+ if (TBI.HasValidInstrDepths)
+ break;
+ Stack.push_back(MBB);
+ MBB = TBI.Pred;
+ } while (MBB);
+
+ // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+ // in the trace. We should track any live-out physregs that were defined in
+ // the trace. This is quite rare in SSA form, typically created by CSE
+ // hoisting a compare.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // Go through trace blocks in top-down order, stopping after the center block.
+ SmallVector<DataDep, 8> Deps;
+ while (!Stack.empty()) {
+ MBB = Stack.pop_back_val();
+ DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *UseMI = I;
+
+ // Collect all data dependencies.
+ Deps.clear();
+ if (UseMI->isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ const DataDep &Dep = Deps[i];
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head)
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
+ Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp,
+ /* FindMin = */ false);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[UseMI];
+ MICycles.Depth = Cycle;
+
+ if (!TBI.HasValidInstrHeights) {
+ DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+ }
+ }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const InstrItineraryData *ItinData,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> ReadOps;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO->readsReg())
+ ReadOps.push_back(MO.getOperandNo());
+ if (!MO->isDef())
+ continue;
+ // This is a def of Reg. Remove corresponding entries from RegUnits, and
+ // update MI Height to consider the physreg dependencies.
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ unsigned DepHeight = I->Cycle;
+ if (!MI->isTransient()) {
+ // We may not know the UseMI of this dependency, if it came from the
+ // live-in list.
+ if (I->MI)
+ DepHeight += TII->computeOperandLatency(ItinData,
+ MI, MO.getOperandNo(),
+ I->MI, I->Op);
+ else
+ // No UseMI. Just use the MI latency instead.
+ DepHeight += TII->getInstrLatency(ItinData, MI);
+ }
+ Height = std::max(Height, DepHeight);
+ // This regunit is dead above MI.
+ RegUnits.erase(I);
+ }
+ }
+
+ // Now we know the height of MI. Update any regunits read.
+ for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
+ unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ // Set the height to the highest reader of the unit.
+ if (LRU.Cycle <= Height && LRU.MI != MI) {
+ LRU.Cycle = Height;
+ LRU.MI = MI;
+ LRU.Op = ReadOps[i];
+ }
+ }
+ }
+
+ return Height;
+}
+
+
+typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep,
+ const MachineInstr *UseMI, unsigned UseHeight,
+ MIHeightMap &Heights,
+ const InstrItineraryData *ItinData,
+ const TargetInstrInfo *TII) {
+ // Adjust height by Dep.DefMI latency.
+ if (!Dep.DefMI->isTransient())
+ UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp);
+
+ // Update Heights[DefMI] to be the maximum height seen.
+ MIHeightMap::iterator I;
+ bool New;
+ tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ if (New)
+ return true;
+
+ // DefMI has been pushed before. Give it the max height.
+ if (I->second < UseHeight)
+ I->second = UseHeight;
+ return false;
+}
+
+/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
+/// of all the blocks in Trace. Stop when reaching the block that contains
+/// DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI,
+ ArrayRef<const MachineBasicBlock*> Trace) {
+ assert(!Trace.empty() && "Trace should contain at least one block");
+ unsigned Reg = DefMI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+ // Reg is live-in to all blocks in Trace that follow DefMBB.
+ for (unsigned i = Trace.size(); i; --i) {
+ const MachineBasicBlock *MBB = Trace[i-1];
+ if (MBB == DefMBB)
+ return;
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ // Just add the register. The height will be updated later.
+ TBI.LiveIns.push_back(Reg);
+ }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+ // The bottom of the trace may already be computed.
+ // Find the blocks that need updating.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidHeight() && "Incomplete trace");
+ if (TBI.HasValidInstrHeights)
+ break;
+ Stack.push_back(MBB);
+ TBI.LiveIns.clear();
+ MBB = TBI.Succ;
+ } while (MBB);
+
+ // As we move upwards in the trace, keep track of instructions that are
+ // required by deeper trace instructions. Map MI -> height required so far.
+ MIHeightMap Heights;
+
+ // For physregs, the def isn't known when we see the use.
+ // Instead, keep track of the highest use of each regunit.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // If the bottom of the trace was already precomputed, initialize heights
+ // from its live-in list.
+ // MBB is the highest precomputed block in the trace.
+ if (MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg LI = TBI.LiveIns[i];
+ if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ // For virtual registers, the def latency is included.
+ unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+ if (Height < LI.Height)
+ Height = LI.Height;
+ } else {
+ // For register units, the def latency is not included because we don't
+ // know the def yet.
+ RegUnits[LI.Reg].Cycle = LI.Height;
+ }
+ }
+ }
+
+ // Go through the trace blocks in bottom-up order.
+ SmallVector<DataDep, 8> Deps;
+ for (;!Stack.empty(); Stack.pop_back()) {
+ MBB = Stack.back();
+ DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
+
+ // Get dependencies from PHIs in the trace successor.
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *PHI = I;
+ Deps.clear();
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height,
+ Heights, MTM.ItinData, MTM.TII))
+ addLiveIns(Deps.front().DefMI, Stack);
+ }
+ }
+ }
+
+ // Go through the block backwards.
+ for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
+ BI != BB;) {
+ const MachineInstr *MI = --BI;
+
+ // Find the MI height as determined by virtual register uses in the
+ // trace below.
+ unsigned Cycle = 0;
+ MIHeightMap::iterator HeightI = Heights.find(MI);
+ if (HeightI != Heights.end()) {
+ Cycle = HeightI->second;
+ // We won't be seeing any more MI uses.
+ Heights.erase(HeightI);
+ }
+
+ // Don't process PHI deps. They depend on the specific predecessor, and
+ // we'll get them when visiting the predecessor.
+ Deps.clear();
+ bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+ // There may also be regunit dependencies to include in the height.
+ if (HasPhysRegs)
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
+ MTM.ItinData, MTM.TII, MTM.TRI);
+
+ // Update the required height of any virtual registers read by MI.
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i)
+ if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
+ addLiveIns(Deps[i].DefMI, Stack);
+
+ InstrCycles &MICycles = Cycles[MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ DEBUG(dbgs() << Cycle << '\t' << *MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
+ }
+
+ // Update virtual live-in heights. They were added by addLiveIns() with a 0
+ // height because the final height isn't known until now.
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg &LIR = TBI.LiveIns[i];
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ LIR.Height = Heights.lookup(DefMI);
+ DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+ }
+
+ // Transfer the live regunits to the live-in list.
+ for (SparseSet<LiveRegUnit>::const_iterator
+ RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
+ TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
+ DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+ << '@' << RI->Cycle);
+ }
+ DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+ // FIXME: Check cache tags, recompute as needed.
+ computeTrace(MBB);
+ computeInstrDepths(MBB);
+ computeInstrHeights(MBB);
+ return Trace(*this, BlockInfo[MBB->getNumber()]);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
+ assert(MI && "Not an instruction.");
+ assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData,
+ Dep.DefMI, Dep.DefOp,
+ PHI, Dep.UseOp,
+ /* FindMin = */ false);
+ return DepCycle;
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // For now, we compute the resource depth from instruction count / issue
+ // width. Eventually, we should compute resource depth per functional unit
+ // and return the max.
+ unsigned Instrs = TBI.InstrDepth;
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+ if (Model->IssueWidth != 0)
+ return Instrs / Model->IssueWidth;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+unsigned MachineTraceMetrics::Trace::
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
+ Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+ if (Model->IssueWidth != 0)
+ return Instrs / Model->IssueWidth;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+ OS << getName() << " ensemble:\n";
+ for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+ OS << " BB#" << i << '\t';
+ BlockInfo[i].print(OS);
+ OS << '\n';
+ }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+ if (hasValidDepth()) {
+ OS << "depth=" << InstrDepth;
+ if (Pred)
+ OS << " pred=BB#" << Pred->getNumber();
+ else
+ OS << " pred=null";
+ OS << " head=BB#" << Head;
+ if (HasValidInstrDepths)
+ OS << " +instrs";
+ } else
+ OS << "depth invalid";
+ OS << ", ";
+ if (hasValidHeight()) {
+ OS << "height=" << InstrHeight;
+ if (Succ)
+ OS << " succ=BB#" << Succ->getNumber();
+ else
+ OS << " succ=null";
+ OS << " tail=BB#" << Tail;
+ if (HasValidInstrHeights)
+ OS << " +instrs";
+ } else
+ OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+ unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+ OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
+ << " --> BB#" << TBI.Tail << ':';
+ if (TBI.hasValidHeight() && TBI.hasValidDepth())
+ OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
+
+ const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+ OS << "\nBB#" << MBBNum;
+ while (Block->hasValidDepth() && Block->Pred) {
+ unsigned Num = Block->Pred->getNumber();
+ OS << " <- BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+
+ Block = &TBI;
+ OS << "\n ";
+ while (Block->hasValidHeight() && Block->Succ) {
+ unsigned Num = Block->Succ->getNumber();
+ OS << " -> BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+ OS << '\n';
+}
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
new file mode 100644
index 0000000..c5b86f3
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.h
@@ -0,0 +1,341 @@
+//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the MachineTraceMetrics analysis pass
+// that estimates CPU resource usage and critical data dependency paths through
+// preferred traces. This is useful for super-scalar CPUs where execution speed
+// can be limited both by data dependencies and by limited execution resources.
+//
+// Out-of-order CPUs will often be executing instructions from multiple basic
+// blocks at the same time. This makes it difficult to estimate the resource
+// usage accurately in a single basic block. Resources can be estimated better
+// by looking at a trace through the current basic block.
+//
+// For every block, the MachineTraceMetrics pass will pick a preferred trace
+// that passes through the block. The trace is chosen based on loop structure,
+// branch probabilities, and resource usage. The intention is to pick likely
+// traces that would be the most affected by code transformations.
+//
+// It is expensive to compute a full arbitrary trace for every block, so to
+// save some computations, traces are chosen to be convergent. This means that
+// if the traces through basic blocks A and B ever cross when moving away from
+// A and B, they never diverge again. This applies in both directions - If the
+// traces meet above A and B, they won't diverge when going further back.
+//
+// Traces tend to align with loops. The trace through a block in an inner loop
+// will begin at the loop entry block and end at a back edge. If there are
+// nested loops, the trace may begin and end at those instead.
+//
+// For each trace, we compute the critical path length, which is the number of
+// cycles required to execute the trace when execution is limited by data
+// dependencies only. We also compute the resource height, which is the number
+// of cycles required to execute all instructions in the trace when ignoring
+// data dependencies.
+//
+// Every instruction in the current block has a slack - the number of cycles
+// execution of the instruction can be delayed without extending the critical
+// path.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class InstrItineraryData;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class raw_ostream;
+
+class MachineTraceMetrics : public MachineFunctionPass {
+ const MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *ItinData;
+ const MachineRegisterInfo *MRI;
+ const MachineLoopInfo *Loops;
+
+public:
+ class Ensemble;
+ class Trace;
+ static char ID;
+ MachineTraceMetrics();
+ void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+ void releaseMemory();
+ void verifyAnalysis() const;
+
+ friend class Ensemble;
+ friend class Trace;
+
+ /// Per-basic block information that doesn't depend on the trace through the
+ /// block.
+ struct FixedBlockInfo {
+ /// The number of non-trivial instructions in the block.
+ /// Doesn't count PHI and COPY instructions that are likely to be removed.
+ unsigned InstrCount;
+
+ /// True when the block contains calls.
+ bool HasCalls;
+
+ FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
+
+ /// Returns true when resource information for this block has been computed.
+ bool hasResources() const { return InstrCount != ~0u; }
+
+ /// Invalidate resource information.
+ void invalidate() { InstrCount = ~0u; }
+ };
+
+ /// Get the fixed resource information about MBB. Compute it on demand.
+ const FixedBlockInfo *getResources(const MachineBasicBlock*);
+
+ /// A virtual register or regunit required by a basic block or its trace
+ /// successors.
+ struct LiveInReg {
+ /// The virtual register required, or a register unit.
+ unsigned Reg;
+
+ /// For virtual registers: Minimum height of the defining instruction.
+ /// For regunits: Height of the highest user in the trace.
+ unsigned Height;
+
+ LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
+ };
+
+ /// Per-basic block information that relates to a specific trace through the
+ /// block. Convergent traces means that only one of these is required per
+ /// block in a trace ensemble.
+ struct TraceBlockInfo {
+ /// Trace predecessor, or NULL for the first block in the trace.
+ /// Valid when hasValidDepth().
+ const MachineBasicBlock *Pred;
+
+ /// Trace successor, or NULL for the last block in the trace.
+ /// Valid when hasValidHeight().
+ const MachineBasicBlock *Succ;
+
+ /// The block number of the head of the trace. (When hasValidDepth()).
+ unsigned Head;
+
+ /// The block number of the tail of the trace. (When hasValidHeight()).
+ unsigned Tail;
+
+ /// Accumulated number of instructions in the trace above this block.
+ /// Does not include instructions in this block.
+ unsigned InstrDepth;
+
+ /// Accumulated number of instructions in the trace below this block.
+ /// Includes instructions in this block.
+ unsigned InstrHeight;
+
+ TraceBlockInfo() :
+ Pred(0), Succ(0),
+ InstrDepth(~0u), InstrHeight(~0u),
+ HasValidInstrDepths(false), HasValidInstrHeights(false) {}
+
+ /// Returns true if the depth resources have been computed from the trace
+ /// above this block.
+ bool hasValidDepth() const { return InstrDepth != ~0u; }
+
+ /// Returns true if the height resources have been computed from the trace
+ /// below this block.
+ bool hasValidHeight() const { return InstrHeight != ~0u; }
+
+ /// Invalidate depth resources when some block above this one has changed.
+ void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
+
+ /// Invalidate height resources when a block below this one has changed.
+ void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
+
+ // Data-dependency-related information. Per-instruction depth and height
+ // are computed from data dependencies in the current trace, using
+ // itinerary data.
+
+ /// Instruction depths have been computed. This implies hasValidDepth().
+ bool HasValidInstrDepths;
+
+ /// Instruction heights have been computed. This implies hasValidHeight().
+ bool HasValidInstrHeights;
+
+ /// Critical path length. This is the number of cycles in the longest data
+ /// dependency chain through the trace. This is only valid when both
+ /// HasValidInstrDepths and HasValidInstrHeights are set.
+ unsigned CriticalPath;
+
+ /// Live-in registers. These registers are defined above the current block
+ /// and used by this block or a block below it.
+ /// This does not include PHI uses in the current block, but it does
+ /// include PHI uses in deeper blocks.
+ SmallVector<LiveInReg, 4> LiveIns;
+
+ void print(raw_ostream&) const;
+ };
+
+ /// InstrCycles represents the cycle height and depth of an instruction in a
+ /// trace.
+ struct InstrCycles {
+ /// Earliest issue cycle as determined by data dependencies and instruction
+ /// latencies from the beginning of the trace. Data dependencies from
+ /// before the trace are not included.
+ unsigned Depth;
+
+ /// Minimum number of cycles from this instruction is issued to the of the
+ /// trace, as determined by data dependencies and instruction latencies.
+ unsigned Height;
+ };
+
+ /// A trace represents a plausible sequence of executed basic blocks that
+ /// passes through the current basic block one. The Trace class serves as a
+ /// handle to internal cached data structures.
+ class Trace {
+ Ensemble &TE;
+ TraceBlockInfo &TBI;
+
+ unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
+
+ public:
+ explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
+ void print(raw_ostream&) const;
+
+ /// Compute the total number of instructions in the trace.
+ unsigned getInstrCount() const {
+ return TBI.InstrDepth + TBI.InstrHeight;
+ }
+
+ /// Return the resource depth of the top/bottom of the trace center block.
+ /// This is the number of cycles required to execute all instructions from
+ /// the trace head to the trace center block. The resource depth only
+ /// considers execution resources, it ignores data dependencies.
+ /// When Bottom is set, instructions in the trace center block are included.
+ unsigned getResourceDepth(bool Bottom) const;
+
+ /// Return the resource length of the trace. This is the number of cycles
+ /// required to execute the instructions in the trace if they were all
+ /// independent, exposing the maximum instruction-level parallelism.
+ ///
+ /// Any blocks in Extrablocks are included as if they were part of the
+ /// trace.
+ unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
+ ArrayRef<const MachineBasicBlock*>()) const;
+
+ /// Return the length of the (data dependency) critical path through the
+ /// trace.
+ unsigned getCriticalPath() const { return TBI.CriticalPath; }
+
+ /// Return the depth and height of MI. The depth is only valid for
+ /// instructions in or above the trace center block. The height is only
+ /// valid for instructions in or below the trace center block.
+ InstrCycles getInstrCycles(const MachineInstr *MI) const {
+ return TE.Cycles.lookup(MI);
+ }
+
+ /// Return the slack of MI. This is the number of cycles MI can be delayed
+ /// before the critical path becomes longer.
+ /// MI must be an instruction in the trace center block.
+ unsigned getInstrSlack(const MachineInstr *MI) const;
+
+ /// Return the Depth of a PHI instruction in a trace center block successor.
+ /// The PHI does not have to be part of the trace.
+ unsigned getPHIDepth(const MachineInstr *PHI) const;
+ };
+
+ /// A trace ensemble is a collection of traces selected using the same
+ /// strategy, for example 'minimum resource height'. There is one trace for
+ /// every block in the function.
+ class Ensemble {
+ SmallVector<TraceBlockInfo, 4> BlockInfo;
+ DenseMap<const MachineInstr*, InstrCycles> Cycles;
+ friend class Trace;
+
+ void computeTrace(const MachineBasicBlock*);
+ void computeDepthResources(const MachineBasicBlock*);
+ void computeHeightResources(const MachineBasicBlock*);
+ unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
+ void computeInstrDepths(const MachineBasicBlock*);
+ void computeInstrHeights(const MachineBasicBlock*);
+ void addLiveIns(const MachineInstr *DefMI,
+ ArrayRef<const MachineBasicBlock*> Trace);
+
+ protected:
+ MachineTraceMetrics &MTM;
+ virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
+ virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
+ explicit Ensemble(MachineTraceMetrics*);
+ const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
+ const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
+ const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
+
+ public:
+ virtual ~Ensemble();
+ virtual const char *getName() const =0;
+ void print(raw_ostream&) const;
+ void invalidate(const MachineBasicBlock *MBB);
+ void verify() const;
+
+ /// Get the trace that passes through MBB.
+ /// The trace is computed on demand.
+ Trace getTrace(const MachineBasicBlock *MBB);
+ };
+
+ /// Strategies for selecting traces.
+ enum Strategy {
+ /// Select the trace through a block that has the fewest instructions.
+ TS_MinInstrCount,
+
+ TS_NumStrategies
+ };
+
+ /// Get the trace ensemble representing the given trace selection strategy.
+ /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
+ /// and valid for the lifetime of the analysis pass.
+ Ensemble *getEnsemble(Strategy);
+
+ /// Invalidate cached information about MBB. This must be called *before* MBB
+ /// is erased, or the CFG is otherwise changed.
+ ///
+ /// This invalidates per-block information about resource usage for MBB only,
+ /// and it invalidates per-trace information for any trace that passes
+ /// through MBB.
+ ///
+ /// Call Ensemble::getTrace() again to update any trace handles.
+ void invalidate(const MachineBasicBlock *MBB);
+
+private:
+ // One entry per basic block, indexed by block number.
+ SmallVector<FixedBlockInfo, 4> BlockInfo;
+
+ // One ensemble per strategy.
+ Ensemble* Ensembles[TS_NumStrategies];
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const MachineTraceMetrics::Trace &Tr) {
+ Tr.print(OS);
+ return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const MachineTraceMetrics::Ensemble &En) {
+ En.print(OS);
+ return OS;
+}
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index d8dece6..852c169 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -73,8 +73,10 @@ namespace {
typedef SmallVector<const uint32_t*, 4> RegMaskVector;
typedef DenseSet<unsigned> RegSet;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+ typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
const MachineInstr *FirstTerminator;
+ BlockSet FunctionBlocks;
BitVector regsReserved;
BitVector regsAllocatable;
@@ -117,6 +119,9 @@ namespace {
// block. This set is disjoint from regsLiveOut.
RegSet vregsRequired;
+ // Set versions of block's predecessor and successor lists.
+ BlockSet Preds, Succs;
+
BBInfo() : reachable(false) {}
// Add register to vregsPassed if it belongs there. Return true if
@@ -203,6 +208,10 @@ namespace {
void report(const char *msg, const MachineBasicBlock *MBB);
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI);
+ void report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI);
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void markReachable(const MachineBasicBlock *MBB);
@@ -212,6 +221,10 @@ namespace {
void calcRegsRequired();
void verifyLiveVariables();
void verifyLiveIntervals();
+ void verifyLiveInterval(const LiveInterval&);
+ void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
+ void verifyLiveIntervalSegment(const LiveInterval&,
+ LiveInterval::const_iterator);
};
struct MachineVerifierPass : public MachineFunctionPass {
@@ -350,9 +363,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
assert(MBB);
report(msg, MBB->getParent());
- *OS << "- basic block: " << MBB->getName()
- << " " << (void*)MBB
- << " (BB#" << MBB->getNumber() << ")";
+ *OS << "- basic block: BB#" << MBB->getNumber()
+ << ' ' << MBB->getName()
+ << " (" << (void*)MBB << ')';
if (Indexes)
*OS << " [" << Indexes->getMBBStartIdx(MBB)
<< ';' << Indexes->getMBBEndIdx(MBB) << ')';
@@ -377,6 +390,28 @@ void MachineVerifier::report(const char *msg,
*OS << "\n";
}
+void MachineVerifier::report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI) {
+ report(msg, MF);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI) {
+ report(msg, MBB);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
BBInfo &MInfo = MBBInfoMap[MBB];
if (!MInfo.reachable) {
@@ -404,6 +439,22 @@ void MachineVerifier::visitMachineFunctionBefore() {
regsAllocatable = TRI->getAllocatableSet(*MF);
markReachable(&MF->front());
+
+ // Build a set of the basic blocks in the function.
+ FunctionBlocks.clear();
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ FunctionBlocks.insert(I);
+ BBInfo &MInfo = MBBInfoMap[I];
+
+ MInfo.Preds.insert(I->pred_begin(), I->pred_end());
+ if (MInfo.Preds.size() != I->pred_size())
+ report("MBB has duplicate entries in its predecessor list.", I);
+
+ MInfo.Succs.insert(I->succ_begin(), I->succ_end());
+ if (MInfo.Succs.size() != I->succ_size())
+ report("MBB has duplicate entries in its successor list.", I);
+ }
}
// Does iterator point to a and b as the first two elements?
@@ -440,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
E = MBB->succ_end(); I != E; ++I) {
if ((*I)->isLandingPad())
LandingPadSuccs.insert(*I);
+ if (!FunctionBlocks.count(*I))
+ report("MBB has successor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Preds.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the predecessor list of the successor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ // Check the predecessor list.
+ for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+ E = MBB->pred_end(); I != E; ++I) {
+ if (!FunctionBlocks.count(*I))
+ report("MBB has predecessor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Succs.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the successor list of the predecessor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
}
const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
@@ -510,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
++MBBI;
if (MBBI == MF->end()) {
report("MBB conditionally falls through out of function!", MBB);
- } if (MBB->succ_size() != 2) {
+ } if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (&*MBBI != TBB)
+ report("MBB exits via conditional branch/fall-through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/fall-through but doesn't have "
"exactly two CFG successors!", MBB);
} else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
@@ -530,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (TBB && FBB) {
// Block conditionally branches somewhere, otherwise branches
// somewhere else.
- if (MBB->succ_size() != 2) {
+ if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (FBB != TBB)
+ report("MBB exits via conditional branch/branch through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/branch through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/branch but doesn't have "
"exactly two CFG successors!", MBB);
} else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
@@ -651,10 +737,10 @@ void
MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const MCInstrDesc &MCID = MI->getDesc();
- const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
// The first MCID.NumDefs operands must be explicit register defines
if (MONum < MCID.getNumDefs()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
if (!MO->isReg())
report("Explicit definition must be a register", MO, MONum);
else if (!MO->isDef() && !MCOI.isOptionalDef())
@@ -662,6 +748,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
else if (MO->isImplicit())
report("Explicit definition marked as implicit", MO, MONum);
} else if (MONum < MCID.getNumOperands()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
// Don't check if it's the last operand in a variadic instruction. See,
// e.g., LDM_RET in the arm back end.
if (MO->isReg() &&
@@ -685,6 +772,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MRI->tracksLiveness() && !MI->isDebugValue())
checkLiveness(MO, MONum);
+ // Verify two-address constraints after leaving SSA form.
+ unsigned DefIdx;
+ if (!MRI->isSSA() && MO->isUse() &&
+ MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ Reg != MI->getOperand(DefIdx).getReg())
+ report("Two-address instruction operands must be identical", MO, MONum);
// Check register classes.
if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -786,20 +879,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (MO->readsReg()) {
regsLiveInButUnused.erase(Reg);
- bool isKill = false;
- unsigned defIdx;
- if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
- // A two-addr use counts as a kill if use and def are the same.
- unsigned DefReg = MI->getOperand(defIdx).getReg();
- if (Reg == DefReg)
- isKill = true;
- else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- report("Two-address instruction operands must be identical", MO, MONum);
- }
- } else
- isKill = MO->isKill();
-
- if (isKill)
+ if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
// Check that LiveVars knows this kill.
@@ -811,23 +891,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
// Check LiveInts liveness and kill.
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- LiveInts && !LiveInts->isNotInMIMap(MI)) {
- SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (!LI.liveAt(UseIdx)) {
- report("No live range at use", MO, MONum);
- *OS << UseIdx << " is not live in " << LI << '\n';
+ if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI);
+ // Check the cached regunit intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) {
+ LiveRangeQuery LRQ(*LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+ << ' ' << *LI << '\n';
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n';
+ }
+ }
}
- // Check for extra kill flags.
- // Note that we allow missing kill flags for now.
- if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
- report("Live range continues after kill flag", MO, MONum);
- *OS << "Live range: " << LI << '\n';
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ // This is a virtual register interval.
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ LiveRangeQuery LRQ(LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
}
- } else {
- report("Virtual register has no Live interval", MO, MONum);
}
}
@@ -1124,281 +1225,282 @@ void MachineVerifier::verifyLiveIntervals() {
const LiveInterval &LI = LiveInts->getInterval(Reg);
assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ verifyLiveInterval(LI);
+ }
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
- I!=E; ++I) {
- VNInfo *VNI = *I;
- const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+ // Verify all the cached regunit intervals.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i))
+ verifyLiveInterval(*LI);
+}
- if (!DefVNI) {
- if (!VNI->isUnused()) {
- report("Valno not live at def and not marked unused", MF);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
- continue;
- }
+void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
+ VNInfo *VNI) {
+ if (VNI->isUnused())
+ return;
- if (VNI->isUnused())
- continue;
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
- if (DefVNI != VNI) {
- report("Live range at def has different valno", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
- continue;
- }
+ if (!DefVNI) {
+ report("Valno not live at def and not marked unused", MF, LI);
+ *OS << "Valno #" << VNI->id << '\n';
+ return;
+ }
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
- if (!MBB) {
- report("Invalid definition index", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- continue;
- }
+ if (DefVNI != VNI) {
+ report("Live range at def has different valno", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live\n";
+ return;
+ }
- if (VNI->isPHIDef()) {
- if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << ", not at the beginning of BB#" << MBB->getNumber()
- << " in " << LI << '\n';
- }
- } else {
- // Non-PHI def.
- const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
- if (!MI) {
- report("No instruction at def index", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- continue;
- }
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ return;
+ }
- bool hasDef = false;
- bool isEarlyClobber = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
- continue;
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- if (MOI->getReg() != LI.reg)
- continue;
- } else {
- if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
- !TRI->regsOverlap(LI.reg, MOI->getReg()))
- continue;
- }
- hasDef = true;
- if (MOI->isEarlyClobber())
- isEarlyClobber = true;
- }
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+ }
+ return;
+ }
- if (!hasDef) {
- report("Defining instruction does not modify register", MI);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ return;
+ }
- // Early clobber defs begin at USE slots, but other defs must begin at
- // DEF slots.
- if (isEarlyClobber) {
- if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- }
- } else if (!VNI->def.isRegister()) {
- report("Non-PHI, non-early clobber def must be at a register slot",
- MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- }
- }
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ if (MOI->getReg() != LI.reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->hasRegUnit(MOI->getReg(), LI.reg))
+ continue;
}
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
- for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
- const VNInfo *VNI = I->valno;
- assert(VNI && "Live range has no valno");
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
- if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
- report("Foreign valno in live range", MF);
- I->print(*OS);
- *OS << " has a valno not in " << LI << '\n';
- }
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+}
- if (VNI->isUnused()) {
- report("Live range valno is marked unused", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
+void
+MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
+ LiveInterval::const_iterator I) {
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
+
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live range", MF, LI);
+ *OS << *I << " has a bad valno\n";
+ }
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
- if (!MBB) {
- report("Bad start of live segment, no basic block", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- continue;
- }
- SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
- if (I->start != MBBStartIdx && I->start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB);
- I->print(*OS);
- *OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
- }
+ if (VNI->isUnused()) {
+ report("Live range valno is marked unused", MF, LI);
+ *OS << *I << '\n';
+ }
- const MachineBasicBlock *EndMBB =
- LiveInts->getMBBFromIndex(I->end.getPrevSlot());
- if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- continue;
- }
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB, LI);
+ *OS << *I << '\n';
+ }
- // No more checks for live-out segments.
- if (I->end == LiveInts->getMBBEndIdx(EndMBB))
- continue;
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
- // The live segment is ending inside EndMBB
- const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
- if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB);
- I->print(*OS);
- *OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
+ // No more checks for live-out segments.
+ if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ return;
+
+ // RegUnit intervals are allowed dead phis.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() &&
+ I->start == VNI->def && I->end == VNI->def.getDeadSlot())
+ return;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ return;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (I->end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+
+ if (I->end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(I->start, I->end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (I->end.isEarlyClobber()) {
+ if (I+1 == LI.end() || (I+1)->start != I->end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != LI.reg)
continue;
- }
+ if (MOI->readsReg())
+ hasRead = true;
+ if (MOI->isDef() && MOI->isDead())
+ hasDeadDef = true;
+ }
- // The block slot must refer to a basic block boundary.
- if (I->end.isBlock()) {
- report("Live segment ends at B slot of an instruction", MI);
+ if (I->end.isDead()) {
+ if (!hasDeadDef) {
+ report("Instruction doesn't have a dead def operand", MI);
I->print(*OS);
*OS << " in " << LI << '\n';
}
-
- if (I->end.isDead()) {
- // Segment ends on the dead slot.
- // That means there must be a dead def.
- if (!SlotIndex::isSameInstr(I->start, I->end)) {
- report("Live segment ending at dead slot spans instructions", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- }
-
- // A live segment can only end at an early-clobber slot if it is being
- // redefined by an early-clobber def.
- if (I->end.isEarlyClobber()) {
- if (I+1 == E || (I+1)->start != I->end) {
- report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
+ } else {
+ if (!hasRead) {
+ report("Instruction ending live range doesn't read the register", MI);
+ *OS << *I << " in " << LI << '\n';
}
+ }
+ }
- // The following checks only apply to virtual registers. Physreg liveness
- // is too weird to check.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- // A live range can end with either a redefinition, a kill flag on a
- // use, or a dead flag on a def.
- bool hasRead = false;
- bool hasDeadDef = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || MOI->getReg() != LI.reg)
- continue;
- if (MOI->readsReg())
- hasRead = true;
- if (MOI->isDef() && MOI->isDead())
- hasDeadDef = true;
- }
-
- if (I->end.isDead()) {
- if (!hasDeadDef) {
- report("Instruction doesn't have a dead def operand", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- } else {
- if (!hasRead) {
- report("Instruction ending live range doesn't read the register",
- MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- }
- }
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ return;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
- // Now check all the basic blocks in this live segment.
- MachineFunction::const_iterator MFI = MBB;
- // Is this live range the beginning of a non-PHIDef VN?
- if (I->start == VNI->def && !VNI->isPHIDef()) {
- // Not live-in to any blocks.
- if (MBB == EndMBB)
- continue;
- // Skip this block.
- ++MFI;
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(MFI);
+
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
+
+ // All predecessors must have a live-out value.
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI, LI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
+ << PEnd << '\n';
+ continue;
}
- for (;;) {
- assert(LiveInts->isLiveInToMBB(LI, MFI));
- // We don't know how to track physregs into a landing pad.
- if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
- MFI->isLandingPad()) {
- if (&*MFI == EndMBB)
- break;
- ++MFI;
- continue;
- }
- // Is VNI a PHI-def in the current block?
- bool IsPHI = VNI->isPHIDef() &&
- VNI->def == LiveInts->getMBBStartIdx(MFI);
-
- // Check that VNI is live-out of all predecessors.
- for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
- PE = MFI->pred_end(); PI != PE; ++PI) {
- SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
- const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
-
- // All predecessors must have a live-out value.
- if (!PVNI) {
- report("Register not marked live out of predecessor", *PI);
- *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
- << PEnd << " in " << LI << '\n';
- continue;
- }
-
- // Only PHI-defs can take different predecessor values.
- if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI);
- *OS << "Valno #" << PVNI->id << " live out of BB#"
- << (*PI)->getNumber() << '@' << PEnd
- << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << " in "
- << PrintReg(Reg) << ": " << LI << '\n';
- }
- }
- if (&*MFI == EndMBB)
- break;
- ++MFI;
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
+ report("Different value live out of predecessor", *PI, LI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
}
}
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+}
- // Check the LI only has one connected component.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- ConnectedVNInfoEqClasses ConEQ(*LiveInts);
- unsigned NumComp = ConEQ.Classify(&LI);
- if (NumComp > 1) {
- report("Multiple connected components in live interval", MF);
- *OS << NumComp << " components in " << LI << '\n';
- for (unsigned comp = 0; comp != NumComp; ++comp) {
- *OS << comp << ": valnos";
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
- E = LI.vni_end(); I!=E; ++I)
- if (comp == ConEQ.getEqClass(*I))
- *OS << ' ' << (*I)->id;
- *OS << '\n';
- }
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I)
+ verifyLiveIntervalValue(LI, *I);
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I)
+ verifyLiveIntervalSegment(LI, I);
+
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF, LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
}
}
}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 69d6d00..56526f2 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -88,6 +88,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
cl::desc("Print machine instrs"),
cl::value_desc("pass-name"), cl::init("option-unspecified"));
+// Experimental option to run live inteerval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+ cl::desc("Run live interval analysis earlier in the pipeline"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -452,7 +456,8 @@ void TargetPassConfig::addMachinePasses() {
printAndVerify("After Instruction Selection");
// Expand pseudo-instructions emitted by ISel.
- addPass(&ExpandISelPseudosID);
+ if (addPass(&ExpandISelPseudosID))
+ printAndVerify("After ExpandISelPseudos");
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
@@ -648,6 +653,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(&MachineLoopInfoID);
addPass(&PHIEliminationID);
}
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (EarlyLiveIntervals)
+ addPass(&LiveIntervalsID);
+
addPass(&TwoAddressInstructionPassID);
if (EnableStrongPHIElim)
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 91c33c4..9099862 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -78,6 +78,8 @@ STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
+STATISTIC(NumSelects, "Number of selects optimized");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -108,12 +110,14 @@ namespace {
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool optimizeSelect(MachineInstr *MI);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
};
}
@@ -384,6 +388,47 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
return false;
}
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+ unsigned TrueOp = 0;
+ unsigned FalseOp = 0;
+ bool Optimizable = false;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+ return false;
+ if (!Optimizable)
+ return false;
+ if (!TII->optimizeSelect(MI))
+ return false;
+ MI->eraseFromParent();
+ ++NumSelects;
+ return true;
+}
+
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+ unsigned &FoldAsLoadDefReg) {
+ if (!MI->canFoldAsLoad() || !MI->mayLoad())
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+
+ unsigned Reg = MI->getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneUse when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (!MI->getOperand(0).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->hasOneUse(Reg)) {
+ FoldAsLoadDefReg = Reg;
+ return true;
+ }
+ return false;
+}
+
bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
@@ -441,6 +486,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallPtrSet<MachineInstr*, 8> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ unsigned FoldAsLoadDefReg;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
@@ -448,37 +494,33 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.clear();
ImmDefRegs.clear();
ImmDefMIs.clear();
+ FoldAsLoadDefReg = 0;
- bool First = true;
- MachineBasicBlock::iterator PMII;
for (MachineBasicBlock::iterator
MII = I->begin(), MIE = I->end(); MII != MIE; ) {
MachineInstr *MI = &*MII;
+ // We may be erasing MI below, increment MII now.
+ ++MII;
LocalMIs.insert(MI);
+ // If there exists an instruction which belongs to the following
+ // categories, we will discard the load candidate.
if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
MI->hasUnmodeledSideEffects()) {
- ++MII;
+ FoldAsLoadDefReg = 0;
continue;
}
-
- if (MI->isBitcast()) {
- if (optimizeBitcastInstr(MI, MBB)) {
- // MI is deleted.
- LocalMIs.erase(MI);
- Changed = true;
- MII = First ? I->begin() : llvm::next(PMII);
- continue;
- }
- } else if (MI->isCompare()) {
- if (optimizeCmpInstr(MI, MBB)) {
- // MI is deleted.
- LocalMIs.erase(MI);
- Changed = true;
- MII = First ? I->begin() : llvm::next(PMII);
- continue;
- }
+ if (MI->mayStore() || MI->isCall())
+ FoldAsLoadDefReg = 0;
+
+ if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+ (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+ (MI->isSelect() && optimizeSelect(MI))) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ continue;
}
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
@@ -489,9 +531,29 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
- First = false;
- PMII = MII;
- ++MII;
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+ // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+ // can enable folding by converting SUB to CMP.
+ MachineInstr *DefMI = 0;
+ MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+ FoldAsLoadDefReg, DefMI);
+ if (FoldMI) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ ++NumLoadFold;
+
+ // MI is replaced with FoldMI.
+ Changed = true;
+ continue;
+ }
+ }
}
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 8325f20..6b3a48e 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
/// its virtual register, and it is guaranteed to be a block-local register.
///
bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
- // Check for non-debug uses or defs following MO.
- // This is the most likely way to fail - fast path it.
- MachineOperand *Next = &MO;
- while ((Next = Next->getNextOperandForReg()))
- if (!Next->isDebug())
- return false;
-
// If the register has ever been spilled or reloaded, we conservatively assume
// it is a global register used in multiple blocks.
if (StackSlotForVirtReg[MO.getReg()] != -1)
return false;
// Check that the use/def chain has exactly one operand - MO.
- return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
+ MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+ if (&I.getOperand() != &MO)
+ return false;
+ return ++I == MRI->reg_nodbg_end();
}
/// addKillFlag - Set kill flags on last use of a virtual register.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6ac5428..d0cff48 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1747,7 +1747,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: "
- << ((Value*)mf.getFunction())->getName() << '\n');
+ << mf.getFunction()->getName() << '\n');
MF = &mf;
if (VerifyEnabled)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 733312f..9906334 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -460,14 +460,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
// Okay, merge "B1" into the same value number as "B0".
- if (BValNo != ValLR->valno) {
- // If B1 is killed by a PHI, then the merged live range must also be killed
- // by the same PHI, as B0 and B1 can not overlap.
- bool HasPHIKill = BValNo->hasPHIKill();
+ if (BValNo != ValLR->valno)
IntB.MergeValueNumberInto(BValNo, ValLR->valno);
- if (HasPHIKill)
- ValLR->valno->setHasPHIKill(true);
- }
DEBUG(dbgs() << " result = " << IntB << '\n');
// If the source instruction was killing the source register before the
@@ -494,6 +488,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
LiveInterval &IntB,
VNInfo *AValNo,
VNInfo *BValNo) {
+ // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+ // the PHI values.
+ if (LIS->hasPHIKill(IntA, AValNo))
+ return true;
+
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -558,10 +557,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// AValNo is the value number in A that defines the copy, A3 in the example.
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && "COPY source not live");
-
- // If other defs can reach uses of this def, then it's not safe to perform
- // the optimization.
- if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+ if (AValNo->isPHIDef() || AValNo->isUnused())
return false;
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
@@ -657,6 +653,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
+ // Kill flags are no longer accurate. They are recomputed after RA.
+ UseMO.setIsKill(false);
if (TargetRegisterInfo::isPhysicalRegister(NewReg))
UseMO.substPhysReg(NewReg, *TRI);
else
@@ -1093,6 +1091,11 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// register live range doesn't need to be accurate as long as all the
// defs are there.
+ // Delete the identity copy.
+ MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+
// We don't track kills for reserved registers.
MRI->clearKillFlags(CP.getSrcReg());
@@ -1382,24 +1385,6 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
++J;
}
- // Update kill info. Some live ranges are extended due to copy coalescing.
- for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
- E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
- VNInfo *VNI = I->first;
- unsigned LHSValID = LHSValNoAssignments[VNI->id];
- if (VNI->hasPHIKill())
- NewVNInfo[LHSValID]->setHasPHIKill(true);
- }
-
- // Update kill info. Some live ranges are extended due to copy coalescing.
- for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
- E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
- VNInfo *VNI = I->first;
- unsigned RHSValID = RHSValNoAssignments[VNI->id];
- if (VNI->hasPHIKill())
- NewVNInfo[RHSValID]->setHasPHIKill(true);
- }
-
// Clear kill flags where live ranges are extended.
while (!LHSOldKills.empty())
LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 110f478..9c1dba3 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -411,12 +411,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
- // SSA defs do not have output/anti dependencies.
+ // Singly defined vregs do not have output/anti dependencies.
// The current operand is a def, so we have at least one.
- //
- // FIXME: This optimization is disabled pending PR13112.
- //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
- // return;
+ // Check here if there are any others...
+ if (MRI.hasOneDef(Reg))
+ return;
// Add output dependence to the next nearest def of this vreg.
//
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 747bc44..1c485a0 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -228,6 +228,9 @@ namespace {
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
@@ -5679,7 +5685,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
- TLI.isOperationLegal(ISD::FMA, VT)) {
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
@@ -5704,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
// fold vector ops
if (VT.isVector()) {
@@ -5724,11 +5731,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
- return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold
@@ -5756,23 +5763,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
- TLI.isOperationLegal(ISD::FMA, VT)) {
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
- return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
}
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
- return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
- DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
}
+
+ // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
}
return SDValue();
@@ -6231,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
@@ -7822,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
return SDValue();
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ // If the element type of the input vector is not the same as
+ // the output element type, make concat_vectors based on input element
+ // type and then bitcast it to the output vector type.
+ //
+ // In another words avoid nodes like this:
+ // <NODE> v16i8 = concat_vectors v4i16 v4i16
+ // Replace it with this one:
+ // <NODE0> v8i16 = concat_vectors v4i16 v4i16
+ // <NODE1> v16i8 = bitcast NODE0
+ EVT ItemType = VecIn1.getValueType().getVectorElementType();
+ if (ItemType != VT.getVectorElementType()) {
+ EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(),
+ ItemType,
+ VecIn1.getValueType().getVectorNumElements()*2);
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1);
+ } else
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+
}
// If VecIn2 is unused then change it to undef.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e5ea6e6..683fac6 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -789,6 +790,17 @@ FastISel::SelectInstruction(const Instruction *I) {
MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc::Func Func;
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+ }
+
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
@@ -1040,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
}
-FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+FastISel::FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
: FuncInfo(funcInfo),
MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()),
@@ -1049,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo)
TD(*TM.getTargetData()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
- TRI(*TM.getRegisterInfo()) {
+ TRI(*TM.getRegisterInfo()),
+ LibInfo(libInfo) {
}
FastISel::~FastISel() {}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 936c126..4488d27 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -411,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
} else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
BA->getTargetFlags()));
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
+ TI->getOffset(),
+ TI->getTargetFlags()));
} else {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Glue &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b0776af..908ebb9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -428,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
DebugLoc dl = LD->getDebugLoc();
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
- if (TLI.isTypeLegal(intVT)) {
+ if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
@@ -436,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->isNonTemporal(),
LD->isInvariant(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
- if (VT.isFloatingPoint() && LoadedVT != VT)
- Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
ValResult = Result;
ChainResult = Chain;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5384576..84e41fc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -61,6 +61,7 @@ namespace llvm {
if (isa<BasicBlockSDNode>(Node)) return true;
if (isa<FrameIndexSDNode>(Node)) return true;
if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
if (isa<JumpTableSDNode>(Node)) return true;
if (isa<ExternalSymbolSDNode>(Node)) return true;
if (isa<BlockAddressSDNode>(Node)) return true;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b971b69..f4fe892 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -403,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddPointer(GA->getGlobal());
ID.AddInteger(GA->getOffset());
ID.AddInteger(GA->getTargetFlags());
+ ID.AddInteger(GA->getAddressSpace());
break;
}
case ISD::BasicBlock:
@@ -438,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(CP->getTargetFlags());
break;
}
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
case ISD::LOAD: {
const LoadSDNode *LD = cast<LoadSDNode>(N);
ID.AddInteger(LD->getMemoryVT().getRawBits());
ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
break;
}
case ISD::STORE: {
const StoreSDNode *ST = cast<StoreSDNode>(N);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
case ISD::ATOMIC_CMP_SWAP:
@@ -467,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
const AtomicSDNode *AT = cast<AtomicSDNode>(N);
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::PREFETCH: {
+ const MemSDNode *PF = cast<MemSDNode>(N);
+ ID.AddInteger(PF->getPointerInfo().getAddrSpace());
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -483,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
break;
}
} // end switch (N->getOpcode())
+
+ // Target specific memory nodes could also have address spaces to check.
+ if (N->isTargetMemoryOpcode())
+ ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
}
/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
@@ -1100,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
+ ID.AddInteger(GV->getType()->getAddressSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1199,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned char TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
@@ -2444,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::FABS:
V.clearSign();
return getConstantFP(V, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
case ISD::FP_EXTEND: {
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
@@ -3901,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -3973,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4029,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr};
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4106,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -4225,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
MMO->isNonTemporal(),
MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4314,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4381,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4405,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8cbe818..f3cf758 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1601,7 +1601,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
// Update successor info
addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
- addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -3460,7 +3463,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDValue InChain = getRoot();
- EVT VT = EVT::getEVT(I.getType());
+ EVT VT = TLI.getValueType(I.getType());
if (I.getAlignment() * 8 < VT.getSizeInBits())
report_fatal_error("Cannot generate unaligned atomic load");
@@ -3490,7 +3493,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue InChain = getRoot();
- EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+ EVT VT = TLI.getValueType(I.getValueOperand()->getType());
if (I.getAlignment() * 8 < VT.getSizeInBits())
report_fatal_error("Cannot generate unaligned atomic store");
@@ -4929,6 +4932,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return 0;
+ case Intrinsic::floor:
+ setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5506,6 +5514,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
return false;
}
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a unary floating-point call.
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
+ return true;
+}
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
@@ -5536,150 +5560,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call.
- if (!F->hasLocalLinkage() && F->hasName()) {
- StringRef Name = F->getName();
- if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
- (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
- (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
if (I.getNumArgOperands() == 2 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
- I.getType() == I.getArgOperand(1)->getType()) {
+ I.getType() == I.getArgOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
LHS.getValueType(), LHS, RHS));
return;
}
- } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
- (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
- (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
return;
- }
- } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
- (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
- (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
return;
- }
- } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
- (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
- (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
return;
- }
- } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
- (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
- (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
- }
- } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
- (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
- (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
- }
- } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
- (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
- (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
- }
- } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
- (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
- (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
- }
- } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
- (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
- (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
return;
- }
- } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
- (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
- (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
- }
- } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
- (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
- (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
- }
- } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
- (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
- (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
- }
- } else if (Name == "memcmp") {
+ break;
+ case LibFunc::memcmp:
if (visitMemCmpCall(I))
return;
+ break;
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index d0fde6f..4090002 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -520,6 +520,7 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9fc225f..13cd011 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
case ISD::INTRINSIC_WO_CHAIN:
@@ -409,6 +410,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << " " << offset;
if (unsigned int TF = CP->getTargetFlags())
OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
} else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
OS << "<";
const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 287c679..4e5e3ba 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -979,7 +979,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
if (TM.Options.EnableFastISel)
- FastIS = TLI.createFastISel(*FuncInfo);
+ FastIS = TLI.createFastISel(*FuncInfo, LibInfo);
// Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index dff9b2c..6820175 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND)
if (ConstantSDNode *AndRHS =
dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = DCI.isBeforeLegalize() ?
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
@@ -2333,7 +2333,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
- EVT ShiftTy = DCI.isBeforeLegalize() ?
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
@@ -2361,7 +2361,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
NewC = NewC.lshr(ShiftBits);
if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
- EVT ShiftTy = DCI.isBeforeLegalize() ?
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
@@ -2464,7 +2464,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
- if (NewCond != Cond)
+ if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
+ getCondCodeAction(NewCond, N0.getValueType()) == Legal))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 9a751c1..4a2b7ec 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -652,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
// Adjust RegAssign if a register assignment is killed at VNI->def. We
// want to avoid calculating the live range of the source register if
// possible.
- AssignI.find(VNI->def.getPrevSlot());
+ AssignI.find(Def.getPrevSlot());
if (!AssignI.valid() || AssignI.start() >= Def)
continue;
// If MI doesn't kill the assigned register, just leave it.
@@ -739,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() {
for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
VI != VE; ++VI) {
VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
assert(ParentVNI && "Parent not live at complement def");
@@ -812,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() {
for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
VI != VE; ++VI) {
VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
const DomPair &Dom = NearestDom[ParentVNI->id];
if (!Dom.first || Dom.second == VNI->def)
@@ -1047,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
if (ParentVNI->isUnused())
continue;
unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
- VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
- VNI->setIsPHIDef(ParentVNI->isPHIDef());
+ defValue(RegIdx, ParentVNI, ParentVNI->def);
// Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 43a6ad8..a04ac3f 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -28,15 +28,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Triple.h"
using namespace llvm;
-// SSPBufferSize - The lower bound for a buffer to be considered for stack
-// smashing protection.
-static cl::opt<unsigned>
-SSPBufferSize("stack-protector-buffer-size", cl::init(8),
- cl::desc("Lower bound for a buffer to be considered for "
- "stack protection"));
-
namespace {
class StackProtector : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -46,7 +41,7 @@ namespace {
Function *F;
Module *M;
- DominatorTree* DT;
+ DominatorTree *DT;
/// InsertStackProtectors - Insert code into the prologue and epilogue of
/// the function.
@@ -60,6 +55,11 @@ namespace {
/// check fails.
BasicBlock *CreateFailBB();
+ /// ContainsProtectableArray - Check whether the type either is an array or
+ /// contains an array of sufficient size so that we need stack protectors
+ /// for it.
+ bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const;
+
/// RequiresStackProtector - Check whether or not this function needs a
/// stack protector based upon the stack protector level.
bool RequiresStackProtector() const;
@@ -70,8 +70,8 @@ namespace {
}
StackProtector(const TargetLowering *tli)
: FunctionPass(ID), TLI(tli) {
- initializeStackProtectorPass(*PassRegistry::getPassRegistry());
- }
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
@@ -95,10 +95,43 @@ bool StackProtector::runOnFunction(Function &Fn) {
DT = getAnalysisIfAvailable<DominatorTree>();
if (!RequiresStackProtector()) return false;
-
+
return InsertStackProtectors();
}
+/// ContainsProtectableArray - Check whether the type either is an array or
+/// contains a char array of sufficient size so that we need stack protectors
+/// for it.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
+ if (!Ty) return false;
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ const TargetMachine &TM = TLI->getTargetMachine();
+ if (!AT->getElementType()->isIntegerTy(8)) {
+ Triple Trip(TM.getTargetTriple());
+
+ // If we're on a non-Darwin platform or we're inside of a structure, don't
+ // add stack protectors unless the array is a character array.
+ if (InStruct || !Trip.isOSDarwin())
+ return false;
+ }
+
+ // If an array has more than SSPBufferSize bytes of allocated space, then we
+ // emit stack protectors.
+ if (TM.Options.SSPBufferSize <= TLI->getTargetData()->getTypeAllocSize(AT))
+ return true;
+ }
+
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ if (!ST) return false;
+
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ if (ContainsProtectableArray(*I, true))
+ return true;
+
+ return false;
+}
+
/// RequiresStackProtector - Check whether or not this function needs a stack
/// protector based upon the stack protector level. The heuristic we use is to
/// add a guard variable to functions that call alloca, and functions with
@@ -110,8 +143,6 @@ bool StackProtector::RequiresStackProtector() const {
if (!F->hasFnAttr(Attribute::StackProtect))
return false;
- const TargetData *TD = TLI->getTargetData();
-
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
BasicBlock *BB = I;
@@ -123,11 +154,8 @@ bool StackProtector::RequiresStackProtector() const {
// protectors.
return true;
- if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
- // If an array has more than SSPBufferSize bytes of allocated space,
- // then we emit stack protectors.
- if (SSPBufferSize <= TD->getTypeAllocSize(AT))
- return true;
+ if (ContainsProtectableArray(AI->getAllocatedType()))
+ return true;
}
}
@@ -159,17 +187,17 @@ bool StackProtector::InsertStackProtectors() {
// StackGuardSlot = alloca i8*
// StackGuard = load __stack_chk_guard
// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
- //
+ //
PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
unsigned AddressSpace, Offset;
if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
Constant *OffsetVal =
ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
-
+
StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
PointerType::get(PtrTy, AddressSpace));
} else {
- StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
}
BasicBlock &Entry = F->getEntryBlock();
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index c6fdc73..5b06195 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
LiveInterval &SrcInterval = LI->getInterval(SrcReg);
SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
+ (void)SrcVNI;
assert(SrcVNI);
- SrcVNI->setHasPHIKill(true);
continue;
}
@@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
assert(DestVNI);
- DestVNI->setIsPHIDef(true);
// Prior to PHI elimination, the live ranges of PHIs begin at their defining
// instruction. After PHI elimination, PHI instructions are replaced by VNs
@@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
LI->getVNInfoAllocator());
- CopyVNI->setIsPHIDef(true);
CopyLI.addRange(LiveRange(MBBStartIndex,
DestCopyIndex.getRegSlot(),
CopyVNI));
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index a3d6771..ddee6b2 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -570,12 +570,12 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
}
/// Return the default expected latency for a def based on it's opcode.
-unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
const MachineInstr *DefMI) const {
if (DefMI->mayLoad())
- return ItinData->SchedModel->LoadLatency;
+ return SchedModel->LoadLatency;
if (isHighLatencyDef(DefMI->getOpcode()))
- return ItinData->SchedModel->HighLatency;
+ return SchedModel->HighLatency;
return 1;
}
@@ -638,7 +638,7 @@ static int computeDefOperandLatency(
return 1;
}
else if(ItinData->isEmpty())
- return TII->defaultDefLatency(ItinData, DefMI);
+ return TII->defaultDefLatency(ItinData->SchedModel, DefMI);
// ...operand lookup required
return -1;
@@ -669,7 +669,8 @@ computeOperandLatency(const InstrItineraryData *ItinData,
// Expected latency is the max of the stage latency and itinerary props.
if (!FindMin)
- InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
@@ -742,6 +743,7 @@ computeOperandLatency(const InstrItineraryData *ItinData,
// Expected latency is the max of the stage latency and itinerary props.
if (!FindMin)
- InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index e4c0119..aa601af 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -30,6 +30,7 @@
#define DEBUG_TYPE "twoaddrinstr"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
-STATISTIC(NumReMats, "Number of instructions re-materialized");
-STATISTIC(NumDeletes, "Number of dead instructions deleted");
STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
MachineRegisterInfo *MRI;
LiveVariables *LV;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
AliasAnalysis *AA;
CodeGenOpt::Level OptLevel;
@@ -92,16 +94,9 @@ namespace {
unsigned Reg,
MachineBasicBlock::iterator OldPos);
- bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
- MachineInstr *MI, MachineInstr *DefMI,
- MachineBasicBlock *MBB, unsigned Loc);
-
bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
unsigned &LastDef);
- MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
- unsigned Dist);
-
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist);
@@ -117,14 +112,6 @@ namespace {
MachineFunction::iterator &mbbi,
unsigned RegA, unsigned RegB, unsigned Dist);
- typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
- bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
- SmallVector<NewKill, 4> &NewKills,
- MachineBasicBlock *MBB, unsigned Dist);
- bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi, unsigned Dist);
-
bool isDefTooClose(unsigned Reg, unsigned Dist,
MachineInstr *MI, MachineBasicBlock *MBB);
@@ -150,6 +137,11 @@ namespace {
void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &Processed);
+ typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+ typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+
void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
@@ -167,6 +159,8 @@ namespace {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
// appropriate location, we can try to sink the current instruction
// past it.
if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
- KillMI->isTerminator())
+ KillMI == OldPos || KillMI->isTerminator())
return false;
// If any of the definitions are used by another instruction between the
@@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
}
}
}
+ assert(KillMO && "Didn't find kill");
// Update kill and LV information.
KillMO->setIsKill(false);
@@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
MBB->remove(MI);
MBB->insert(KillPos, MI);
+ if (LIS)
+ LIS->handleMove(MI);
+
++Num3AddrSunk;
return true;
}
-/// isTwoAddrUse - Return true if the specified MI is using the specified
-/// register as a two-address operand.
-static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
- const MCInstrDesc &MCID = UseMI->getDesc();
- for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = UseMI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg &&
- (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
- // Earlier use is a two-address one.
- return true;
- }
- return false;
-}
-
-/// isProfitableToReMat - Return true if the heuristics determines it is likely
-/// to be profitable to re-materialize the definition of Reg rather than copy
-/// the register.
-bool
-TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
- const TargetRegisterClass *RC,
- MachineInstr *MI, MachineInstr *DefMI,
- MachineBasicBlock *MBB, unsigned Loc) {
- bool OtherUse = false;
- for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineOperand &UseMO = UI.getOperand();
- MachineInstr *UseMI = UseMO.getParent();
- MachineBasicBlock *UseMBB = UseMI->getParent();
- if (UseMBB == MBB) {
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
- if (DI != DistanceMap.end() && DI->second == Loc)
- continue; // Current use.
- OtherUse = true;
- // There is at least one other use in the MBB that will clobber the
- // register.
- if (isTwoAddrUse(UseMI, Reg))
- return true;
- }
- }
-
- // If other uses in MBB are not two-address uses, then don't remat.
- if (OtherUse)
- return false;
-
- // No other uses in the same block, remat if it's defined in the same
- // block so it does not unnecessarily extend the live range.
- return MBB == DefMI->getParent();
-}
-
/// NoUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
@@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
return !(LastUse > LastDef && LastUse < Dist);
}
-MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
- MachineBasicBlock *MBB,
- unsigned Dist) {
- unsigned LastUseDist = 0;
- MachineInstr *LastUse = 0;
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
- E = MRI->reg_end(); I != E; ++I) {
- MachineOperand &MO = I.getOperand();
- MachineInstr *MI = MO.getParent();
- if (MI->getParent() != MBB || MI->isDebugValue())
- continue;
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
- if (DI == DistanceMap.end())
- continue;
- if (DI->second >= Dist)
- continue;
-
- if (MO.isUse() && DI->second > LastUseDist) {
- LastUse = DI->first;
- LastUseDist = DI->second;
- }
- }
- return LastUse;
-}
-
/// isCopyToReg - Return true if the specified MI is a copy instruction or
/// a extract_subreg instruction. It also returns the source and destination
/// registers and whether they are physical registers by reference.
@@ -538,7 +462,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
}
-/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// isProfitableToCommute - Return true if it's potentially profitable to commute
/// the two-address instruction that's being processed.
bool
TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
@@ -628,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
if (LV)
// Update live variables
LV->replaceKillInstruction(RegC, MI, NewMI);
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(MI, NewMI);
mbbi->insert(mi, NewMI); // Insert the new inst
mbbi->erase(mi); // Nuke the old inst.
@@ -676,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
bool Sunk = false;
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(mi, NewMI);
+
if (NewMI->findRegisterUseOperand(RegB, false, TRI))
// FIXME: Temporary workaround. If the new instruction doesn't
// uses RegB, convertToThreeAddress must have created more
@@ -785,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
return;
}
-/// isSafeToDelete - If the specified instruction does not produce any side
-/// effects and all of its defs are dead, then it's safe to delete.
-static bool isSafeToDelete(MachineInstr *MI,
- const TargetInstrInfo *TII,
- SmallVector<unsigned, 4> &Kills) {
- if (MI->mayStore() || MI->isCall())
- return false;
- if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
- return false;
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.isDef() && !MO.isDead())
- return false;
- if (MO.isUse() && MO.isKill())
- Kills.push_back(MO.getReg());
- }
- return true;
-}
-
-/// canUpdateDeletedKills - Check if all the registers listed in Kills are
-/// killed by instructions in MBB preceding the current instruction at
-/// position Dist. If so, return true and record information about the
-/// preceding kills in NewKills.
-bool TwoAddressInstructionPass::
-canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
- SmallVector<NewKill, 4> &NewKills,
- MachineBasicBlock *MBB, unsigned Dist) {
- while (!Kills.empty()) {
- unsigned Kill = Kills.back();
- Kills.pop_back();
- if (TargetRegisterInfo::isPhysicalRegister(Kill))
- return false;
-
- MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
- if (!LastKill)
- return false;
-
- bool isModRef = LastKill->definesRegister(Kill);
- NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
- LastKill));
- }
- return true;
-}
-
-/// DeleteUnusedInstr - If an instruction with a tied register operand can
-/// be safely deleted, just delete it.
-bool
-TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi,
- unsigned Dist) {
- // Check if the instruction has no side effects and if all its defs are dead.
- SmallVector<unsigned, 4> Kills;
- if (!isSafeToDelete(mi, TII, Kills))
- return false;
-
- // If this instruction kills some virtual registers, we need to
- // update the kill information. If it's not possible to do so,
- // then bail out.
- SmallVector<NewKill, 4> NewKills;
- if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
- return false;
-
- if (LV) {
- while (!NewKills.empty()) {
- MachineInstr *NewKill = NewKills.back().second;
- unsigned Kill = NewKills.back().first.first;
- bool isDead = NewKills.back().first.second;
- NewKills.pop_back();
- if (LV->removeVirtualRegisterKilled(Kill, mi)) {
- if (isDead)
- LV->addVirtualRegisterDead(Kill, NewKill);
- else
- LV->addVirtualRegisterKilled(Kill, NewKill);
- }
- }
- }
-
- mbbi->erase(mi); // Nuke the old inst.
- mi = nmi;
- return true;
-}
-
/// RescheduleMIBelowKill - If there is one more local instruction that reads
/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
/// instruction in order to eliminate the need for the copy.
@@ -1000,6 +843,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
// Update live variables
LV->removeVirtualRegisterKilled(Reg, KillMI);
LV->addVirtualRegisterKilled(Reg, MI);
+ if (LIS)
+ LIS->handleMove(MI);
DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
return true;
@@ -1154,6 +999,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
// Update live variables
LV->removeVirtualRegisterKilled(Reg, KillMI);
LV->addVirtualRegisterKilled(Reg, MI);
+ if (LIS)
+ LIS->handleMove(KillMI);
DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
return true;
@@ -1180,16 +1027,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
assert(TargetRegisterInfo::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
-
- // If regA is dead and the instruction can be deleted, just delete
- // it so it doesn't clobber regB.
bool regBKilled = isKilled(MI, regB, MRI, TII);
- if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
- DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
- ++NumDeletes;
- DEBUG(dbgs() << "\tdeleted unused instruction.\n");
- return true; // Done with this instruction."
- }
if (TargetRegisterInfo::isVirtualRegister(regA))
ScanUses(regA, &*mbbi, Processed);
@@ -1273,16 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
if (NewOpc != 0) {
const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
if (UnfoldMCID.getNumDefs() == 1) {
- MachineFunction &MF = *mbbi->getParent();
-
// Unfold the load.
DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
const TargetRegisterClass *RC =
TRI->getAllocatableClass(
- TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, MF));
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
+ if (!TII->unfoldMemoryOperand(*MF, &MI, Reg,
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
NewMIs)) {
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -1359,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
}
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool AnyOps = false;
+ unsigned NumOps = MI->isInlineAsm() ?
+ MI->getNumOperands() : MCID.getNumOperands();
+
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+ AnyOps = true;
+ MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned SrcReg = SrcMO.getReg();
+ unsigned DstReg = DstMO.getReg();
+ // Tied constraint already satisfied?
+ if (SrcReg == DstReg)
+ continue;
+
+ assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+ // Deal with <undef> uses immediately - simply rewrite the src operand.
+ if (SrcMO.isUndef()) {
+ // Constrain the DstReg register class if required.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+ TRI, *MF))
+ MRI->constrainRegClass(DstReg, RC);
+ SrcMO.setReg(DstReg);
+ DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ continue;
+ }
+ TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+ return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
+ bool IsEarlyClobber = false;
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ unsigned RegB = 0;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+
+ const MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned RegA = DstMO.getReg();
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+
+ // Grab RegB from the instruction because it may have changed if the
+ // instruction was commuted.
+ RegB = MI->getOperand(SrcIdx).getReg();
+
+ if (RegA == RegB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = RegA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !MI->getOperand(i).isReg() ||
+ MI->getOperand(i).getReg() != RegA);
+#endif
+
+ // Emit a copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+
+ // Update DistanceMap.
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ DistanceMap.insert(std::make_pair(PrevMI, Dist));
+ DistanceMap[MI] = ++Dist;
+
+ SlotIndex CopyIdx;
+ if (Indexes)
+ CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot();
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (TargetRegisterInfo::isVirtualRegister(RegA) &&
+ TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
+
+ MO.setReg(RegA);
+
+ // Propagate SrcRegMap.
+ SrcRegMap[RegA] = RegB;
+ }
+
+
+ if (AllUsesCopied) {
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) {
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ LV->addVirtualRegisterKilled(RegB, PrevMI);
+ }
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+}
+
/// runOnMachineFunction - Reduce two-address instructions to two operands.
///
-bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- MRI = &MF.getRegInfo();
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetMachine &TM = MF->getTarget();
+ MRI = &MF->getRegInfo();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
InstrItins = TM.getInstrItineraryData();
+ Indexes = getAnalysisIfAvailable<SlotIndexes>();
LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
OptLevel = TM.getOptLevel();
@@ -1375,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
DEBUG(dbgs() << "********** Function: "
- << MF.getFunction()->getName() << '\n');
+ << MF->getFunction()->getName() << '\n');
// This pass takes the function out of SSA form.
MRI->leaveSSA();
- // ReMatRegs - Keep track of the registers whose def's are remat'ed.
- BitVector ReMatRegs(MRI->getNumVirtRegs());
-
- typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
- TiedOperandMap;
- TiedOperandMap TiedOperands(4);
+ TiedOperandMap TiedOperands;
SmallPtrSet<MachineInstr*, 8> Processed;
- for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
mbbi != mbbe; ++mbbi) {
unsigned Dist = 0;
DistanceMap.clear();
@@ -1407,50 +1400,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
if (mi->isRegSequence())
RegSequences.push_back(&*mi);
- const MCInstrDesc &MCID = mi->getDesc();
- bool FirstTied = true;
-
DistanceMap.insert(std::make_pair(mi, ++Dist));
ProcessCopy(&*mi, &*mbbi, Processed);
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
- unsigned NumOps = mi->isInlineAsm()
- ? mi->getNumOperands() : MCID.getNumOperands();
- for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
- unsigned DstIdx = 0;
- if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
- continue;
-
- if (FirstTied) {
- FirstTied = false;
- ++NumTwoAddressInstrs;
- DEBUG(dbgs() << '\t' << *mi);
- }
-
- assert(mi->getOperand(SrcIdx).isReg() &&
- mi->getOperand(SrcIdx).getReg() &&
- mi->getOperand(SrcIdx).isUse() &&
- "two address instruction invalid");
-
- unsigned regB = mi->getOperand(SrcIdx).getReg();
-
- // Deal with <undef> uses immediately - simply rewrite the src operand.
- if (mi->getOperand(SrcIdx).isUndef()) {
- unsigned DstReg = mi->getOperand(DstIdx).getReg();
- // Constrain the DstReg register class if required.
- if (TargetRegisterInfo::isVirtualRegister(DstReg))
- if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
- TRI, MF))
- MRI->constrainRegClass(DstReg, RC);
- mi->getOperand(SrcIdx).setReg(DstReg);
- DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi);
- continue;
- }
- TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
+ if (!collectTiedOperands(mi, TiedOperands)) {
+ mi = nmi;
+ continue;
}
+ ++NumTwoAddressInstrs;
+ MadeChange = true;
+ DEBUG(dbgs() << '\t' << *mi);
+
// If the instruction has a single pair of tied operands, try some
// transformations that may either eliminate the tied operands or
// improve the opportunities for coalescing away the register copy.
@@ -1477,139 +1441,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
// Now iterate over the information collected above.
for (TiedOperandMap::iterator OI = TiedOperands.begin(),
OE = TiedOperands.end(); OI != OE; ++OI) {
- SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
-
- bool IsEarlyClobber = false;
- bool RemovedKillFlag = false;
- bool AllUsesCopied = true;
- unsigned LastCopiedReg = 0;
- unsigned regB = OI->first;
- for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
- unsigned SrcIdx = TiedPairs[tpi].first;
- unsigned DstIdx = TiedPairs[tpi].second;
-
- const MachineOperand &DstMO = mi->getOperand(DstIdx);
- unsigned regA = DstMO.getReg();
- IsEarlyClobber |= DstMO.isEarlyClobber();
-
- // Grab regB from the instruction because it may have changed if the
- // instruction was commuted.
- regB = mi->getOperand(SrcIdx).getReg();
-
- if (regA == regB) {
- // The register is tied to multiple destinations (or else we would
- // not have continued this far), but this use of the register
- // already matches the tied destination. Leave it.
- AllUsesCopied = false;
- continue;
- }
- LastCopiedReg = regA;
-
- assert(TargetRegisterInfo::isVirtualRegister(regB) &&
- "cannot make instruction into two-address form");
-
-#ifndef NDEBUG
- // First, verify that we don't have a use of "a" in the instruction
- // (a = b + a for example) because our transformation will not
- // work. This should never occur because we are in SSA form.
- for (unsigned i = 0; i != mi->getNumOperands(); ++i)
- assert(i == DstIdx ||
- !mi->getOperand(i).isReg() ||
- mi->getOperand(i).getReg() != regA);
-#endif
-
- // Emit a copy or rematerialize the definition.
- bool isCopy = false;
- const TargetRegisterClass *rc = MRI->getRegClass(regB);
- MachineInstr *DefMI = MRI->getUniqueVRegDef(regB);
- // If it's safe and profitable, remat the definition instead of
- // copying it.
- if (DefMI &&
- DefMI->isAsCheapAsAMove() &&
- DefMI->isSafeToReMat(TII, AA, regB) &&
- isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
- DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
- unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
- TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
- ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
- ++NumReMats;
- } else {
- BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
- regA).addReg(regB);
- isCopy = true;
- }
-
- // Update DistanceMap.
- MachineBasicBlock::iterator prevMI = prior(mi);
- DistanceMap.insert(std::make_pair(prevMI, Dist));
- DistanceMap[mi] = ++Dist;
-
- DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
-
- MachineOperand &MO = mi->getOperand(SrcIdx);
- assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
- "inconsistent operand info for 2-reg pass");
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
-
- // Make sure regA is a legal regclass for the SrcIdx operand.
- if (TargetRegisterInfo::isVirtualRegister(regA) &&
- TargetRegisterInfo::isVirtualRegister(regB))
- MRI->constrainRegClass(regA, MRI->getRegClass(regB));
-
- MO.setReg(regA);
-
- if (isCopy)
- // Propagate SrcRegMap.
- SrcRegMap[regA] = regB;
- }
-
- if (AllUsesCopied) {
- if (!IsEarlyClobber) {
- // Replace other (un-tied) uses of regB with LastCopiedReg.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
- MO.setReg(LastCopiedReg);
- }
- }
- }
-
- // Update live variables for regB.
- if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
- LV->addVirtualRegisterKilled(regB, prior(mi));
-
- } else if (RemovedKillFlag) {
- // Some tied uses of regB matched their destination registers, so
- // regB is still used in this instruction, but a kill flag was
- // removed from a different tied use of regB, so now we need to add
- // a kill flag to one of the remaining uses of regB.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- MO.setIsKill(true);
- break;
- }
- }
- }
-
- // We didn't change anything if there was a single tied pair, and that
- // pair didn't require copies.
- if (AllUsesCopied || TiedPairs.size() > 1) {
- MadeChange = true;
-
- // Schedule the source copy / remat inserted to form two-address
- // instruction. FIXME: Does it matter the distance map may not be
- // accurate after it's scheduled?
- TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
- }
-
+ processTiedPairs(mi, OI->second, Dist);
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
@@ -1634,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
}
- // Some remat'ed instructions are dead.
- for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
- if (MRI->use_nodbg_empty(VReg)) {
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
- DefMI->eraseFromParent();
- }
- }
-
// Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
// SSA form. It's now safe to de-SSA.
MadeChange |= EliminateRegSequences();