aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2015-03-23 12:10:34 -0700
committerStephen Hines <srhines@google.com>2015-03-23 12:10:34 -0700
commitebe69fe11e48d322045d5949c83283927a0d790b (patch)
treec92f1907a6b8006628a4b01615f38264d29834ea /lib/CodeGen
parentb7d2e72b02a4cb8034f32f8247a2558d2434e121 (diff)
downloadexternal_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.zip
external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.tar.gz
external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.tar.bz2
Update aosp/master LLVM for rebase to r230699.
Change-Id: I2b5be30509658cb8266be782de0ab24f9099f9b9
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp25
-rw-r--r--lib/CodeGen/Analysis.cpp25
-rw-r--r--lib/CodeGen/Android.mk6
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/Android.mk1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp519
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp191
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp36
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h2
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h587
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h14
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocList.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp5
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp114
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp306
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h90
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp269
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h133
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h12
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.h1
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp167
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h17
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp59
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h18
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp45
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp72
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp146
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.h4
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp50
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp632
-rw-r--r--lib/CodeGen/BranchFolding.cpp3
-rw-r--r--lib/CodeGen/CMakeLists.txt8
-rw-r--r--lib/CodeGen/CallingConvLower.cpp69
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp1139
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp4
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp40
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp12
-rw-r--r--lib/CodeGen/ErlangGC.cpp50
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp157
-rw-r--r--lib/CodeGen/ForwardControlFlowIntegrity.cpp4
-rw-r--r--lib/CodeGen/GCMetadata.cpp126
-rw-r--r--lib/CodeGen/GCMetadataPrinter.cpp12
-rw-r--r--lib/CodeGen/GCRootLowering.cpp351
-rw-r--r--lib/CodeGen/GCStrategy.cpp415
-rw-r--r--lib/CodeGen/GlobalMerge.cpp39
-rw-r--r--lib/CodeGen/IfConversion.cpp12
-rw-r--r--lib/CodeGen/InlineSpiller.cpp25
-rw-r--r--lib/CodeGen/JumpInstrTables.cpp6
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp34
-rw-r--r--lib/CodeGen/LexicalScopes.cpp26
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp1
-rw-r--r--lib/CodeGen/LiveDebugVariables.h2
-rw-r--r--lib/CodeGen/LiveInterval.cpp776
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp633
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp18
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp262
-rw-r--r--lib/CodeGen/LiveRangeCalc.h78
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp29
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp62
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp1
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp13
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp54
-rw-r--r--lib/CodeGen/MachineCSE.cpp42
-rw-r--r--lib/CodeGen/MachineCombiner.cpp28
-rw-r--r--lib/CodeGen/MachineDominanceFrontier.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp34
-rw-r--r--lib/CodeGen/MachineFunctionPass.cpp27
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--lib/CodeGen/MachineInstr.cpp103
-rw-r--r--lib/CodeGen/MachineLICM.cpp34
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp24
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp2
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp16
-rw-r--r--lib/CodeGen/MachineScheduler.cpp23
-rw-r--r--lib/CodeGen/MachineSink.cpp34
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp7
-rw-r--r--lib/CodeGen/MachineVerifier.cpp330
-rw-r--r--lib/CodeGen/OcamlGC.cpp13
-rw-r--r--lib/CodeGen/Passes.cpp152
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp17
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp4
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp68
-rw-r--r--lib/CodeGen/RegAllocBase.cpp2
-rw-r--r--lib/CodeGen/RegAllocBase.h3
-rw-r--r--lib/CodeGen/RegAllocFast.cpp21
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp209
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp172
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp1301
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp4
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/Android.mk1
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt1
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1719
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp62
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp150
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp89
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h7
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp38
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp197
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp224
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp435
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h36
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp128
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp679
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.h138
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp92
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp412
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp457
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--lib/CodeGen/SplitKit.cpp53
-rw-r--r--lib/CodeGen/StackColoring.cpp3
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp2
-rw-r--r--lib/CodeGen/StackMaps.cpp17
-rw-r--r--lib/CodeGen/StackProtector.cpp66
-rw-r--r--lib/CodeGen/StatepointExampleGC.cpp55
-rw-r--r--lib/CodeGen/TailDuplication.cpp3
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp5
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp25
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp195
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp307
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp6
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp4
-rw-r--r--lib/CodeGen/VirtRegMap.cpp52
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp626
140 files changed, 11193 insertions, 5917 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 91c1314..58b87e1 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -296,6 +296,16 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
+ // FIXME: We must leave subregisters of live super registers as live, so that
+ // we don't clear out the register tracking information for subregisters of
+ // super registers we're still tracking (and with which we're unioning
+ // subregister definitions).
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) {
+ DEBUG(if (!header && footer) dbgs() << footer);
+ return;
+ }
+
if (!State->IsLive(Reg)) {
KillIndices[Reg] = KillIdx;
DefIndices[Reg] = ~0u;
@@ -673,6 +683,21 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
goto next_super_reg;
}
+ // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also
+ // defines 'NewReg' via an early-clobber operand.
+ auto Range = RegRefs.equal_range(Reg);
+ for (auto Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ auto UseMI = Q->second.Operand->getParent();
+ int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI);
+ if (Idx == -1)
+ continue;
+
+ if (UseMI->getOperand(Idx).isEarlyClobber()) {
+ DEBUG(dbgs() << "(ec)");
+ goto next_super_reg;
+ }
+ }
+
// Record that 'Reg' can be renamed to 'NewReg'.
RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
}
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 9a3b790..e50b846 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -30,10 +30,9 @@
using namespace llvm;
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
+/// Compute the linearized index of a member in a nested aggregate/struct/array
+/// by recursing and accumulating CurIndex as long as there are indices in the
+/// index list.
unsigned llvm::ComputeLinearIndex(Type *Ty,
const unsigned *Indices,
const unsigned *IndicesEnd,
@@ -52,16 +51,23 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex);
}
+ assert(!Indices && "Unexpected out of bound");
return CurIndex;
}
// Given an array type, recursively traverse the elements.
else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Type *EltTy = ATy->getElementType();
- for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
- if (Indices && *Indices == i)
- return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(EltTy, nullptr, nullptr, CurIndex);
+ unsigned NumElts = ATy->getNumElements();
+ // Compute the Linear offset when jumping one element of the array
+ unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0);
+ if (Indices) {
+ assert(*Indices < NumElts && "Unexpected out of bound");
+ // If the indice is inside the array, compute the index to the requested
+ // elt and recurse inside the element with the end of the indices list
+ CurIndex += EltLinearOffset* *Indices;
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
}
+ CurIndex += EltLinearOffset*NumElts;
return CurIndex;
}
// We haven't found the type we're looking for, so keep searching.
@@ -512,8 +518,9 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
return false;
}
+ const Function *F = ExitBB->getParent();
return returnTypeIsEligibleForTailCall(
- ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering());
+ F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
}
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 5cb351d..ec3cd77 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -24,6 +24,7 @@ codegen_SRC_FILES := \
ForwardControlFlowIntegrity.cpp \
GCMetadata.cpp \
GCMetadataPrinter.cpp \
+ GCRootLowering.cpp \
GCStrategy.cpp \
GlobalMerge.cpp \
IfConversion.cpp \
@@ -95,6 +96,7 @@ codegen_SRC_FILES := \
ScheduleDAGPrinter.cpp \
ScoreboardHazardRecognizer.cpp \
ShadowStackGC.cpp \
+ ShadowStackGCLowering.cpp \
SjLjEHPrepare.cpp \
SlotIndexes.cpp \
SpillPlacement.cpp \
@@ -104,6 +106,7 @@ codegen_SRC_FILES := \
StackMaps.cpp \
StackProtector.cpp \
StackSlotColoring.cpp \
+ StatepointExampleGC.cpp \
TailDuplication.cpp \
TargetFrameLoweringImpl.cpp \
TargetInstrInfo.cpp \
@@ -114,7 +117,8 @@ codegen_SRC_FILES := \
TargetSchedule.cpp \
TwoAddressInstructionPass.cpp \
UnreachableBlockElim.cpp \
- VirtRegMap.cpp
+ VirtRegMap.cpp \
+ WinEHPrepare.cpp
# For the host
# =====================================================
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 66c6c63..6fe75ad 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -88,8 +88,7 @@ void ARMException::endFunction(const MachineFunction *) {
Asm->getFunctionNumber()));
if (!MMI->getLandingPads().empty()) {
// Emit references to personality.
- if (const Function * Personality =
- MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+ if (const Function *Personality = MMI->getPersonality()) {
MCSymbol *PerSym = Asm->getSymbol(Personality);
Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
ATS.emitPersonality(PerSym);
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index cb8e96a..0ce457f 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -13,6 +13,7 @@ codegen_asmprinter_SRC_FILES := \
DwarfCFIException.cpp \
DwarfCompileUnit.cpp \
DwarfDebug.cpp \
+ DwarfExpression.cpp \
DwarfFile.cpp \
DwarfStringPool.cpp \
DwarfUnit.cpp \
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8a32713..988381d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -41,9 +41,11 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -98,15 +100,17 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
return NumBits;
}
-AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
- MII(tm.getSubtargetImpl()->getInstrInfo()),
- OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr),
- LastFn(0), Counter(~0U), SetCounter(0) {
- DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr;
+ OutContext(Streamer->getContext()), OutStreamer(*Streamer.release()),
+ LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) {
+ DD = nullptr;
+ MMI = nullptr;
+ LI = nullptr;
+ MF = nullptr;
CurrentFnSym = CurrentFnSymForSize = nullptr;
GCMetadataPrinters = nullptr;
- VerboseAsm = Streamer.isVerboseAsm();
+ VerboseAsm = OutStreamer.isVerboseAsm();
}
AsmPrinter::~AsmPrinter() {
@@ -129,16 +133,17 @@ unsigned AsmPrinter::getFunctionNumber() const {
}
const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
- return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
+ return *TM.getObjFileLowering();
}
/// getDataLayout - Return information about data layout.
const DataLayout &AsmPrinter::getDataLayout() const {
- return *TM.getSubtargetImpl()->getDataLayout();
+ return *TM.getDataLayout();
}
const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
- return TM.getSubtarget<MCSubtargetInfo>();
+ assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
+ return MF->getSubtarget<MCSubtargetInfo>();
}
void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
@@ -175,7 +180,7 @@ bool AsmPrinter::doInitialization(Module &M) {
OutStreamer.InitSections(false);
- Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
+ Mang = new Mangler(TM.getDataLayout());
// Emit the version-min deplyment target directive if needed.
//
@@ -210,7 +215,7 @@ bool AsmPrinter::doInitialization(Module &M) {
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (auto &I : *MI)
if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
- MP->beginAssembly(*this);
+ MP->beginAssembly(M, *MI, *this);
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
@@ -222,12 +227,25 @@ bool AsmPrinter::doInitialization(Module &M) {
}
if (MAI->doesSupportDebugInformation()) {
- if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment())
+ bool skip_dwarf = false;
+ if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) {
Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
DbgTimerName,
CodeViewLineTablesGroupName));
- DD = new DwarfDebug(this, &M);
- Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
+ // FIXME: Don't emit DWARF debug info if there's at least one function
+ // with AddressSanitizer instrumentation.
+ // This is a band-aid fix for PR22032.
+ for (auto &F : M.functions()) {
+ if (F.hasFnAttribute(Attribute::SanitizeAddress)) {
+ skip_dwarf = true;
+ break;
+ }
+ }
+ }
+ if (!skip_dwarf) {
+ DD = new DwarfDebug(this, &M);
+ Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
+ }
}
EHStreamer *ES = nullptr;
@@ -241,7 +259,7 @@ bool AsmPrinter::doInitialization(Module &M) {
case ExceptionHandling::ARM:
ES = new ARMException(this);
break;
- case ExceptionHandling::ItaniumWinEH:
+ case ExceptionHandling::WinEH:
switch (MAI->getWinEHEncodingType()) {
default: llvm_unreachable("unsupported unwinding information encoding");
case WinEH::EncodingType::Itanium:
@@ -323,6 +341,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (EmitSpecialLLVMGlobal(GV))
return;
+ // Skip the emission of global equivalents. The symbol can be emitted later
+ // on by emitGlobalGOTEquivs in case it turns out to be needed.
+ if (GlobalGOTEquivs.count(getSymbol(GV)))
+ return;
+
if (isVerbose()) {
GV->printAsOperand(OutStreamer.GetCommentOS(),
/*PrintType=*/false, GV->getParent());
@@ -336,12 +359,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (!GV->hasInitializer()) // External globals require no extra code.
return;
+ GVSym->redefineIfPossible();
+ if (GVSym->isDefined() || GVSym->isVariable())
+ report_fatal_error("symbol '" + Twine(GVSym->getName()) +
+ "' is already defined");
+
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
// If the alignment is specified, we *must* obey it. Overaligning a global
@@ -508,6 +536,10 @@ void AsmPrinter::EmitFunctionHeader() {
OutStreamer.GetCommentOS() << '\n';
}
+ // Emit the prefix data.
+ if (F->hasPrefixData())
+ EmitGlobalConstant(F->getPrefixData());
+
// Emit the CurrentFnSym. This is a virtual function to allow targets to
// do their wild and crazy things as required.
EmitFunctionEntryLabel();
@@ -528,27 +560,32 @@ void AsmPrinter::EmitFunctionHeader() {
HI.Handler->beginFunction(MF);
}
- // Emit the prefix data.
- if (F->hasPrefixData())
- EmitGlobalConstant(F->getPrefixData());
+ // Emit the prologue data.
+ if (F->hasPrologueData())
+ EmitGlobalConstant(F->getPrologueData());
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
/// function. This can be overridden by targets as required to do custom stuff.
void AsmPrinter::EmitFunctionEntryLabel() {
+ CurrentFnSym->redefineIfPossible();
+
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
- if (CurrentFnSym->isUndefined())
- return OutStreamer.EmitLabel(CurrentFnSym);
+ if (CurrentFnSym->isVariable())
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' is a protected alias");
+ if (CurrentFnSym->isDefined())
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
- report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
- "' label emitted multiple times to assembly file");
+ return OutStreamer.EmitLabel(CurrentFnSym);
}
/// emitComments - Pretty-print comments for instructions.
static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getParent()->getParent();
- const TargetMachine &TM = MF->getTarget();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Check for spills and reloads
int FI;
@@ -558,24 +595,20 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// We assume a single instruction only has a spill or reload, not
// both.
const MachineMemOperand *MMO;
- if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI,
- FI)) {
+ if (TII->isLoadFromStackSlotPostFE(&MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Reload\n";
}
- } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot(
- &MI, MMO, FI)) {
+ } else if (TII->hasLoadFromStackSlot(&MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Reload\n";
- } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE(
- &MI, FI)) {
+ } else if (TII->isStoreToStackSlotPostFE(&MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Spill\n";
}
- } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot(
- &MI, MMO, FI)) {
+ } else if (TII->hasStoreToStackSlot(&MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Spill\n";
}
@@ -589,9 +622,8 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
/// that is an implicit def.
void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- OutStreamer.AddComment(
- Twine("implicit-def: ") +
- TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
+ OutStreamer.AddComment(Twine("implicit-def: ") +
+ MMI->getContext().getRegisterInfo()->getName(RegNo));
OutStreamer.AddBlankLine();
}
@@ -601,7 +633,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
const MachineOperand &Op = MI->getOperand(i);
assert(Op.isReg() && "KILL instruction must have only register operands");
Str += ' ';
- Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg());
+ Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg());
Str += (Op.isDef() ? "<def>" : "<kill>");
}
AP.OutStreamer.AddComment(Str);
@@ -629,9 +661,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << V.getName();
DIExpression Expr = MI->getDebugExpression();
- if (Expr.isVariablePiece())
- OS << " [piece offset=" << Expr.getPieceOffset()
- << " size=" << Expr.getPieceSize() << "]";
+ if (Expr.isBitPiece())
+ OS << " [bit_piece offset=" << Expr.getBitPieceOffset()
+ << " size=" << Expr.getBitPieceSize() << "]";
OS << " <- ";
// The second operand is only an offset if it's an immediate.
@@ -663,8 +695,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
Reg = MI->getOperand(0).getReg();
} else {
assert(MI->getOperand(0).isFI() && "Unknown operand type");
- const TargetFrameLowering *TFI =
- AP.TM.getSubtargetImpl()->getFrameLowering();
+ const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering();
Offset += TFI->getFrameIndexReference(*AP.MF,
MI->getOperand(0).getIndex(), Reg);
Deref = true;
@@ -678,7 +709,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
if (Deref)
OS << '[';
- OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg);
+ OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg);
}
if (Deref)
@@ -701,8 +732,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
}
bool AsmPrinter::needsSEHMoves() {
- return MAI->getExceptionHandlingType() == ExceptionHandling::ItaniumWinEH &&
- MF->getFunction()->needsUnwindTableEntry();
+ return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry();
}
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
@@ -721,6 +751,16 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
emitCFIInstruction(CFI);
}
+void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
+ // The operands are the MCSymbol and the frame offset of the allocation.
+ MCSymbol *FrameAllocSym = MI.getOperand(0).getMCSymbol();
+ int FrameOffset = MI.getOperand(1).getImm();
+
+ // Emit a symbol assignment.
+ OutStreamer.EmitAssignment(FrameAllocSym,
+ MCConstantExpr::Create(FrameOffset, OutContext));
+}
+
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
void AsmPrinter::EmitFunctionBody() {
@@ -759,6 +799,10 @@ void AsmPrinter::EmitFunctionBody() {
emitCFIInstruction(MI);
break;
+ case TargetOpcode::FRAME_ALLOC:
+ emitFrameAlloc(MI);
+ break;
+
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol());
@@ -800,7 +844,7 @@ void AsmPrinter::EmitFunctionBody() {
// labels from collapsing together. Just emit a noop.
if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) {
MCInst Noop;
- TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop);
+ MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop);
OutStreamer.AddComment("avoids zero-length function");
// Targets can opt-out of emitting the noop here by leaving the opcode
@@ -852,13 +896,95 @@ void AsmPrinter::EmitFunctionBody() {
OutStreamer.AddBlankLine();
}
-static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP);
+/// \brief Compute the number of Global Variables that uses a Constant.
+static unsigned getNumGlobalVariableUses(const Constant *C) {
+ if (!C)
+ return 0;
+
+ if (isa<GlobalVariable>(C))
+ return 1;
+
+ unsigned NumUses = 0;
+ for (auto *CU : C->users())
+ NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU));
+
+ return NumUses;
+}
+
+/// \brief Only consider global GOT equivalents if at least one user is a
+/// cstexpr inside an initializer of another global variables. Also, don't
+/// handle cstexpr inside instructions. During global variable emission,
+/// candidates are skipped and are emitted later in case at least one cstexpr
+/// isn't replaced by a PC relative GOT entry access.
+static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
+ unsigned &NumGOTEquivUsers) {
+ // Global GOT equivalents are unnamed private globals with a constant
+ // pointer initializer to another global symbol. They must point to a
+ // GlobalVariable or Function, i.e., as GlobalValue.
+ if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() ||
+ !GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0)))
+ return false;
+
+ // To be a got equivalent, at least one of its users need to be a constant
+ // expression used by another global variable.
+ for (auto *U : GV->users())
+ NumGOTEquivUsers += getNumGlobalVariableUses(cast<Constant>(U));
+
+ return NumGOTEquivUsers > 0;
+}
+
+/// \brief Unnamed constant global variables solely contaning a pointer to
+/// another globals variable is equivalent to a GOT table entry; it contains the
+/// the address of another symbol. Optimize it and replace accesses to these
+/// "GOT equivalents" by using the GOT entry for the final global instead.
+/// Compute GOT equivalent candidates among all global variables to avoid
+/// emitting them if possible later on, after it use is replaced by a GOT entry
+/// access.
+void AsmPrinter::computeGlobalGOTEquivs(Module &M) {
+ if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ return;
+
+ for (const auto &G : M.globals()) {
+ unsigned NumGOTEquivUsers = 0;
+ if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers))
+ continue;
+
+ const MCSymbol *GOTEquivSym = getSymbol(&G);
+ GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers);
+ }
+}
+
+/// \brief Constant expressions using GOT equivalent globals may not be eligible
+/// for PC relative GOT entry conversion, in such cases we need to emit such
+/// globals we previously omitted in EmitGlobalVariable.
+void AsmPrinter::emitGlobalGOTEquivs() {
+ if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ return;
+
+ while (!GlobalGOTEquivs.empty()) {
+ DenseMap<const MCSymbol *, GOTEquivUsePair>::iterator I =
+ GlobalGOTEquivs.begin();
+ const MCSymbol *S = I->first;
+ const GlobalVariable *GV = I->second.first;
+ GlobalGOTEquivs.erase(S);
+ EmitGlobalVariable(GV);
+ }
+}
bool AsmPrinter::doFinalization(Module &M) {
+ // Gather all GOT equivalent globals in the module. We really need two
+ // passes over the globals: one to compute and another to avoid its emission
+ // in EmitGlobalVariable, otherwise we would not be able to handle cases
+ // where the got equivalent shows up before its use.
+ computeGlobalGOTEquivs(M);
+
// Emit global variables.
for (const auto &G : M.globals())
EmitGlobalVariable(&G);
+ // Emit remaining GOT equivalent globals.
+ emitGlobalGOTEquivs();
+
// Emit visibility info for declarations
for (const Function &F : M) {
if (!F.isDeclaration())
@@ -875,10 +1001,15 @@ bool AsmPrinter::doFinalization(Module &M) {
JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>();
if (JITI && !JITI->getTables().empty()) {
+ // Since we're at the module level we can't use a function specific
+ // MCSubtargetInfo - instead create one with the module defaults.
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
unsigned Arch = Triple(getTargetTriple()).getArch();
bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
MCInst TrapInst;
- TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst);
+ TII->getTrap(TrapInst);
unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment());
// Emit the right section for these functions.
@@ -904,9 +1035,8 @@ bool AsmPrinter::doFinalization(Module &M) {
const MCSymbolRefExpr *TargetSymRef =
MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
OutContext);
- TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch(
- JumpToFun, TargetSymRef);
- OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo());
+ TII->getUnconditionalBranch(JumpToFun, TargetSymRef);
+ OutStreamer.EmitInstruction(JumpToFun, *STI);
++Count;
}
@@ -914,7 +1044,7 @@ bool AsmPrinter::doFinalization(Module &M) {
uint64_t Remaining = NextPowerOf2(Count) - Count;
for (uint64_t C = 0; C < Remaining; ++C) {
EmitAlignment(LogAlignment);
- OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo());
+ OutStreamer.EmitInstruction(TrapInst, *STI);
}
}
@@ -974,18 +1104,34 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, Alias.getVisibility());
// Emit the directives as assignments aka .set:
- OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee(), *this));
+ OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I))
- MP->finishAssembly(*this);
+ MP->finishAssembly(M, *MI, *this);
// Emit llvm.ident metadata in an '.ident' directive.
EmitModuleIdents(M);
+ // Emit __morestack address if needed for indirect calls.
+ if (MMI->usesMorestackAddr()) {
+ const MCSection *ReadOnlySection =
+ getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
+ /*C=*/nullptr);
+ OutStreamer.SwitchSection(ReadOnlySection);
+
+ MCSymbol *AddrSymbol =
+ OutContext.GetOrCreateSymbol(StringRef("__morestack_addr"));
+ OutStreamer.EmitLabel(AddrSymbol);
+
+ unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
+ OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
+ PtrSize);
+ }
+
// If we don't have any trampolines, then we don't require stack memory
// to be executable. Some targets have a directive to declare this.
Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
@@ -1044,7 +1190,7 @@ void AsmPrinter::EmitConstantPool() {
unsigned Align = CPE.getAlignment();
SectionKind Kind =
- CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout());
+ CPE.getSectionKind(TM.getDataLayout());
const Constant *C = nullptr;
if (!CPE.isMachineConstantPoolEntry())
@@ -1098,7 +1244,7 @@ void AsmPrinter::EmitConstantPool() {
Type *Ty = CPE.getType();
Offset = NewOffset +
- TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty);
+ TM.getDataLayout()->getTypeAllocSize(Ty);
OutStreamer.EmitLabel(Sym);
if (CPE.isMachineConstantPoolEntry())
@@ -1113,7 +1259,7 @@ void AsmPrinter::EmitConstantPool() {
/// by the current function to the current output stream.
///
void AsmPrinter::EmitJumpTableInfo() {
- const DataLayout *DL = MF->getSubtarget().getDataLayout();
+ const DataLayout *DL = MF->getTarget().getDataLayout();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1123,29 +1269,21 @@ void AsmPrinter::EmitJumpTableInfo() {
// Pick the directive to use to print the jump table entries, and switch to
// the appropriate section.
const Function *F = MF->getFunction();
- bool JTInDiffSection = false;
- if (// In PIC mode, we need to emit the jump table to the same section as the
- // function body itself, otherwise the label differences won't make sense.
- // FIXME: Need a better predicate for this: what about custom entries?
- MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
- // We should also do if the section name is NULL or function is declared
- // in discardable section
- // FIXME: this isn't the right predicate, should be based on the MCSection
- // for the function.
- F->isWeakForLinker()) {
- OutStreamer.SwitchSection(
- getObjFileLowering().SectionForGlobal(F, *Mang, TM));
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+ bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
+ *F);
+ if (!JTInDiffSection) {
+ OutStreamer.SwitchSection(TLOF.SectionForGlobal(F, *Mang, TM));
} else {
// Otherwise, drop it in the readonly section.
const MCSection *ReadOnlySection =
- getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
- /*C=*/nullptr);
+ TLOF.getSectionForJumpTable(*F, *Mang, TM);
OutStreamer.SwitchSection(ReadOnlySection);
- JTInDiffSection = true;
}
EmitAlignment(Log2_32(
- MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout())));
+ MJTI->getEntryAlignment(*TM.getDataLayout())));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
@@ -1163,7 +1301,7 @@ void AsmPrinter::EmitJumpTableInfo() {
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
MAI->doesSetDirectiveSuppressesReloc()) {
SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
- const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
const MachineBasicBlock *MBB = JTBBs[ii];
@@ -1207,9 +1345,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
case MachineJumpTableInfo::EK_Inline:
llvm_unreachable("Cannot emit EK_Inline jump table entry");
case MachineJumpTableInfo::EK_Custom32:
- Value =
- TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry(
- MJTI, MBB, UID, OutContext);
+ Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry(
+ MJTI, MBB, UID, OutContext);
break;
case MachineJumpTableInfo::EK_BlockAddress:
// EK_BlockAddress - Each entry is a plain address of block, e.g.:
@@ -1248,7 +1385,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
break;
}
Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
- const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext);
Value = MCBinaryExpr::CreateSub(Value, Base, OutContext);
break;
@@ -1258,7 +1395,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
assert(Value && "Unknown entry kind!");
unsigned EntrySize =
- MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout());
+ MJTI->getEntrySize(*TM.getDataLayout());
OutStreamer.EmitValue(Value, EntrySize);
}
@@ -1368,7 +1505,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
}
// Emit the function pointers in the target-specific order
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
unsigned Align = Log2_32(DL->getPointerPrefAlignment());
std::stable_sort(Structors.begin(), Structors.end(),
[](const Structor &L,
@@ -1483,25 +1620,26 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
//
void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
if (GV)
- NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(),
+ NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(),
NumBits);
if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
+ assert(NumBits <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
if (getCurrentSection()->getKind().isText())
- OutStreamer.EmitCodeAlignment(1 << NumBits);
+ OutStreamer.EmitCodeAlignment(1u << NumBits);
else
- OutStreamer.EmitValueToAlignment(1 << NumBits);
+ OutStreamer.EmitValueToAlignment(1u << NumBits);
}
//===----------------------------------------------------------------------===//
// Constant emission.
//===----------------------------------------------------------------------===//
-/// lowerConstant - Lower the specified LLVM Constant to an MCExpr.
-///
-static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
- MCContext &Ctx = AP.OutContext;
+const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
+ MCContext &Ctx = OutContext;
if (CV->isNullValue() || isa<UndefValue>(CV))
return MCConstantExpr::Create(0, Ctx);
@@ -1510,19 +1648,18 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
+ return MCSymbolRefExpr::Create(getSymbol(GV), Ctx);
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
- return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+ return MCSymbolRefExpr::Create(GetBlockAddressSymbol(BA), Ctx);
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (!CE) {
llvm_unreachable("Unknown constant value to lower!");
}
- if (const MCExpr *RelocExpr =
- AP.getObjFileLowering().getExecutableRelativeSymbol(CE, *AP.Mang,
- AP.TM))
+ if (const MCExpr *RelocExpr
+ = getObjFileLowering().getExecutableRelativeSymbol(CE, *Mang, TM))
return RelocExpr;
switch (CE->getOpcode()) {
@@ -1531,9 +1668,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
if (Constant *C = ConstantFoldConstantExpression(
- CE, AP.TM.getSubtargetImpl()->getDataLayout()))
+ CE, TM.getDataLayout()))
if (C != CE)
- return lowerConstant(C, AP);
+ return lowerConstant(C);
// Otherwise report the problem to the user.
{
@@ -1541,16 +1678,17 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
raw_string_ostream OS(S);
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
- !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
+ !MF ? nullptr : MF->getFunction()->getParent());
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
- const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout &DL = *TM.getDataLayout();
+
// Generate a symbolic expression for the byte address
APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
- const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
+ const MCExpr *Base = lowerConstant(CE->getOperand(0));
if (!OffsetAI)
return Base;
@@ -1566,26 +1704,28 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
// is reasonable to treat their delta as a 32-bit value.
// FALL THROUGH.
case Instruction::BitCast:
- return lowerConstant(CE->getOperand(0), AP);
+ return lowerConstant(CE->getOperand(0));
case Instruction::IntToPtr: {
- const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout &DL = *TM.getDataLayout();
+
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
false/*ZExt*/);
- return lowerConstant(Op, AP);
+ return lowerConstant(Op);
}
case Instruction::PtrToInt: {
- const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout &DL = *TM.getDataLayout();
+
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
Constant *Op = CE->getOperand(0);
Type *Ty = CE->getType();
- const MCExpr *OpExpr = lowerConstant(Op, AP);
+ const MCExpr *OpExpr = lowerConstant(Op);
// We can emit the pointer value into this slot if the slot is an
// integer slot equal to the size of the pointer.
@@ -1611,8 +1751,8 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP);
- const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP);
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0));
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1));
switch (CE->getOpcode()) {
default: llvm_unreachable("Unknown binary operator constant cast expr");
case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
@@ -1629,7 +1769,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
}
}
-static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP);
+static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP,
+ const Constant *BaseCV = nullptr,
+ uint64_t Offset = 0);
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
@@ -1653,7 +1795,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (CI->getBitWidth() > 64) return -1;
uint64_t Size =
- TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType());
+ TM.getDataLayout()->getTypeAllocSize(V->getType());
uint64_t Value = CI->getZExtValue();
// Make sure the constant is at least 8 bits long and has a power
@@ -1698,7 +1840,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
int Value = isRepeatedByteSequence(CDS, AP.TM);
if (Value != -1) {
uint64_t Bytes =
- AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ AP.TM.getDataLayout()->getTypeAllocSize(
CDS->getType());
// Don't emit a 1-byte object as a .fill.
if (Bytes > 1)
@@ -1749,7 +1891,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
}
- const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout &DL = *AP.TM.getDataLayout();
unsigned Size = DL.getTypeAllocSize(CDS->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
@@ -1758,20 +1900,22 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
-static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) {
+static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
+ const Constant *BaseCV, uint64_t Offset) {
// See if we can aggregate some values. Make sure it can be
// represented as a series of bytes of the constant value.
int Value = isRepeatedByteSequence(CA, AP.TM);
+ const DataLayout &DL = *AP.TM.getDataLayout();
if (Value != -1) {
- uint64_t Bytes =
- AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
- CA->getType());
+ uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
AP.OutStreamer.EmitFill(Bytes, Value);
}
else {
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- emitGlobalConstantImpl(CA->getOperand(i), AP);
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+ emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset);
+ Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
+ }
}
}
@@ -1779,7 +1923,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
emitGlobalConstantImpl(CV->getOperand(i), AP);
- const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout &DL = *AP.TM.getDataLayout();
unsigned Size = DL.getTypeAllocSize(CV->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
@@ -1787,24 +1931,25 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
AP.OutStreamer.EmitZeros(Padding);
}
-static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
+static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP,
+ const Constant *BaseCV, uint64_t Offset) {
// Print the fields in successive locations. Pad to align if needed!
- const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = AP.TM.getDataLayout();
unsigned Size = DL->getTypeAllocSize(CS->getType());
const StructLayout *Layout = DL->getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
const Constant *Field = CS->getOperand(i);
+ // Print the actual field value.
+ emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar);
+
// Check if padding is needed and insert one or more 0s.
uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
- Layout->getElementOffset(i)) - FieldSize;
SizeSoFar += FieldSize + PadSize;
- // Now print the actual field value.
- emitGlobalConstantImpl(Field, AP);
-
// Insert padding - this may include padding to increase the size of the
// current field up to the ABI size (if the struct is not packed) as well
// as padding to ensure that the next field starts at the right offset.
@@ -1839,7 +1984,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
- if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() &&
+ if (AP.TM.getDataLayout()->isBigEndian() &&
!CFP->getType()->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
@@ -1858,13 +2003,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
}
// Emit the tail padding for the long double.
- const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout &DL = *AP.TM.getDataLayout();
AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
DL.getTypeStoreSize(CFP->getType()));
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
- const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = AP.TM.getDataLayout();
unsigned BitWidth = CI->getBitWidth();
// Copy the value as we may massage the layout for constants whose bit width
@@ -1910,7 +2055,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// Emit the extra bits after the 64-bits chunks.
// Emit a directive that fills the expected size.
- uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(
CI->getType());
Size -= (BitWidth / 64) * 8;
assert(Size && Size * 8 >= ExtraBitsSize &&
@@ -1920,9 +2065,100 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
}
}
-static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
- const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+/// \brief Transform a not absolute MCExpr containing a reference to a GOT
+/// equivalent global, by a target specific GOT pc relative access to the
+/// final symbol.
+static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
+ const Constant *BaseCst,
+ uint64_t Offset) {
+ // The global @foo below illustrates a global that uses a got equivalent.
+ //
+ // @bar = global i32 42
+ // @gotequiv = private unnamed_addr constant i32* @bar
+ // @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64),
+ // i64 ptrtoint (i32* @foo to i64))
+ // to i32)
+ //
+ // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually
+ // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the
+ // form:
+ //
+ // foo = cstexpr, where
+ // cstexpr := <gotequiv> - "." + <cst>
+ // cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst>
+ //
+ // After canonicalization by EvaluateAsRelocatable `ME` turns into:
+ //
+ // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
+ // gotpcrelcst := <offset from @foo base> + <cst>
+ //
+ MCValue MV;
+ if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
+ return;
+
+ const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol();
+ if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
+ return;
+
+ const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst);
+ if (!BaseGV)
+ return;
+
+ const MCSymbol *BaseSym = AP.getSymbol(BaseGV);
+ if (BaseSym != &MV.getSymB()->getSymbol())
+ return;
+
+ // Make sure to match:
+ //
+ // gotpcrelcst := <offset from @foo base> + <cst>
+ //
+ int64_t GOTPCRelCst = Offset + MV.getConstant();
+ if (GOTPCRelCst < 0)
+ return;
+
+ // Emit the GOT PC relative to replace the got equivalent global, i.e.:
+ //
+ // bar:
+ // .long 42
+ // gotequiv:
+ // .quad bar
+ // foo:
+ // .long gotequiv - "." + <cst>
+ //
+ // is replaced by the target specific equivalent to:
+ //
+ // bar:
+ // .long 42
+ // foo:
+ // .long bar@GOTPCREL+<gotpcrelcst>
+ //
+ AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
+ const GlobalVariable *GV = Result.first;
+ unsigned NumUses = Result.second;
+ const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
+ const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
+ *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym,
+ GOTPCRelCst);
+
+ // Update GOT equivalent usage information
+ --NumUses;
+ if (NumUses)
+ AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
+ else
+ AP.GlobalGOTEquivs.erase(GOTEquivSym);
+}
+
+static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
+ const Constant *BaseCV, uint64_t Offset) {
+ const DataLayout *DL = AP.TM.getDataLayout();
uint64_t Size = DL->getTypeAllocSize(CV->getType());
+
+ // Globals with sub-elements such as combinations of arrays and structs
+ // are handled recursively by emitGlobalConstantImpl. Keep track of the
+ // constant symbol base and the current position with BaseCV and Offset.
+ if (!BaseCV && CV->hasOneUse())
+ BaseCV = dyn_cast<Constant>(CV->user_back());
+
if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size);
@@ -1955,10 +2191,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
return emitGlobalConstantDataSequential(CDS, AP);
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
- return emitGlobalConstantArray(CVA, AP);
+ return emitGlobalConstantArray(CVA, AP, BaseCV, Offset);
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
- return emitGlobalConstantStruct(CVS, AP);
+ return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
// Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
@@ -1981,13 +2217,21 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
- AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size);
+ const MCExpr *ME = AP.lowerConstant(CV);
+
+ // Since lowerConstant already folded and got rid of all IR pointer and
+ // integer casts, detect GOT equivalent accesses by looking into the MCExpr
+ // directly.
+ if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset);
+
+ AP.OutStreamer.EmitValue(ME, Size);
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
uint64_t Size =
- TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
+ TM.getDataLayout()->getTypeAllocSize(CV->getType());
if (Size)
emitGlobalConstantImpl(CV, *this);
else if (MAI->hasSubsectionsViaSymbols()) {
@@ -2015,16 +2259,16 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
/// temporary label with the specified stem and unique ID.
-MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name, unsigned ID) const {
+ const DataLayout *DL = TM.getDataLayout();
return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
Name + Twine(ID));
}
/// GetTempSymbol - Return an assembler temporary label with the specified
/// stem.
-MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name) const {
+ const DataLayout *DL = TM.getDataLayout();
return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
Name);
}
@@ -2040,7 +2284,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
return OutContext.GetOrCreateSymbol
(Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ "_" + Twine(CPID));
@@ -2054,7 +2298,7 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
/// GetJTSetSymbol - Return the symbol for the specified jump table .set
/// FIXME: privatize to AsmPrinter.
MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
return OutContext.GetOrCreateSymbol
(Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
Twine(UID) + "_set_" + Twine(MBBID));
@@ -2252,6 +2496,11 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
if (!S.usesMetadata())
return nullptr;
+ assert(!S.useStatepoints() && "statepoints do not currently support custom"
+ " stackmap formats, please see the documentation for a description of"
+ " the default format. If you really need a custom serialized format,"
+ " please file a bug");
+
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
gcp_map_type::iterator GCPI = GCMap.find(&S);
if (GCPI != GCMap.end())
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 05f6a68..d0958c1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -12,26 +12,44 @@
//===----------------------------------------------------------------------===//
#include "ByteStreamer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/ADT/SmallBitVector.h"
+#include "DwarfExpression.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+ BS.EmitInt8(
+ Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
+ : dwarf::OperationEncodingString(Op));
+}
+
+void DebugLocDwarfExpression::EmitSigned(int Value) {
+ BS.EmitSLEB128(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) {
+ BS.EmitULEB128(Value, Twine(Value));
+}
+
+bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
+ // This information is not available while emitting .debug_loc entries.
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Dwarf Emission Helper Routines
//===----------------------------------------------------------------------===//
@@ -131,7 +149,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
default:
llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr:
- return TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+ return TM.getDataLayout()->getPointerSize();
case dwarf::DW_EH_PE_udata2:
return 2;
case dwarf::DW_EH_PE_udata4:
@@ -187,57 +205,6 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
EmitLabelDifference(Label, SectionLabel, 4);
}
-/// Emit a dwarf register operation.
-static void emitDwarfRegOp(ByteStreamer &Streamer, int Reg) {
- assert(Reg >= 0);
- if (Reg < 32) {
- Streamer.EmitInt8(dwarf::DW_OP_reg0 + Reg,
- dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
- } else {
- Streamer.EmitInt8(dwarf::DW_OP_regx, "DW_OP_regx");
- Streamer.EmitULEB128(Reg, Twine(Reg));
- }
-}
-
-/// Emit an (double-)indirect dwarf register operation.
-static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset,
- bool Deref) {
- assert(Reg >= 0);
- if (Reg < 32) {
- Streamer.EmitInt8(dwarf::DW_OP_breg0 + Reg,
- dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
- } else {
- Streamer.EmitInt8(dwarf::DW_OP_bregx, "DW_OP_bregx");
- Streamer.EmitULEB128(Reg, Twine(Reg));
- }
- Streamer.EmitSLEB128(Offset);
- if (Deref)
- Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
-}
-
-void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits,
- unsigned OffsetInBits) const {
- assert(SizeInBits > 0 && "piece has size zero");
- const unsigned SizeOfByte = 8;
- if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
- Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece");
- Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits));
- Streamer.EmitULEB128(OffsetInBits, Twine(OffsetInBits));
- } else {
- Streamer.EmitInt8(dwarf::DW_OP_piece, "DW_OP_piece");
- unsigned ByteSize = SizeInBits / SizeOfByte;
- Streamer.EmitULEB128(ByteSize, Twine(ByteSize));
- }
-}
-
-/// Emit a shift-right dwarf expression.
-static void emitDwarfOpShr(ByteStreamer &Streamer,
- unsigned ShiftBy) {
- Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu");
- Streamer.EmitULEB128(ShiftBy);
- Streamer.EmitInt8(dwarf::DW_OP_shr, "DW_OP_shr");
-}
-
// Some targets do not provide a DWARF register number for every
// register. This function attempts to emit a DWARF register by
// emitting a piece of a super-register or by piecing together
@@ -247,112 +214,44 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
unsigned PieceSizeInBits,
unsigned PieceOffsetInBits) const {
assert(MLoc.isReg() && "MLoc must be a register");
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
- int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
-
- // If this is a valid register number, emit it.
- if (Reg >= 0) {
- emitDwarfRegOp(Streamer, Reg);
- EmitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits);
- return;
- }
-
- // Walk up the super-register chain until we find a valid number.
- // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
- for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) {
- Reg = TRI->getDwarfRegNum(*SR, false);
- if (Reg >= 0) {
- unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg());
- unsigned Size = TRI->getSubRegIdxSize(Idx);
- unsigned RegOffset = TRI->getSubRegIdxOffset(Idx);
- OutStreamer.AddComment("super-register");
- emitDwarfRegOp(Streamer, Reg);
- if (PieceOffsetInBits == RegOffset) {
- EmitDwarfOpPiece(Streamer, Size, RegOffset);
- } else {
- // If this is part of a variable in a sub-register at a
- // non-zero offset, we need to manually shift the value into
- // place, since the DW_OP_piece describes the part of the
- // variable, not the position of the subregister.
- if (RegOffset)
- emitDwarfOpShr(Streamer, RegOffset);
- EmitDwarfOpPiece(Streamer, Size, PieceOffsetInBits);
- }
- return;
- }
- }
-
- // Otherwise, attempt to find a covering set of sub-register numbers.
- // For example, Q0 on ARM is a composition of D0+D1.
- //
- // Keep track of the current position so we can emit the more
- // efficient DW_OP_piece.
- unsigned CurPos = PieceOffsetInBits;
- // The size of the register in bits, assuming 8 bits per byte.
- unsigned RegSize = TRI->getMinimalPhysRegClass(MLoc.getReg())->getSize() * 8;
- // Keep track of the bits in the register we already emitted, so we
- // can avoid emitting redundant aliasing subregs.
- SmallBitVector Coverage(RegSize, false);
- for (MCSubRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) {
- unsigned Idx = TRI->getSubRegIndex(MLoc.getReg(), *SR);
- unsigned Size = TRI->getSubRegIdxSize(Idx);
- unsigned Offset = TRI->getSubRegIdxOffset(Idx);
- Reg = TRI->getDwarfRegNum(*SR, false);
-
- // Intersection between the bits we already emitted and the bits
- // covered by this subregister.
- SmallBitVector Intersection(RegSize, false);
- Intersection.set(Offset, Offset + Size);
- Intersection ^= Coverage;
-
- // If this sub-register has a DWARF number and we haven't covered
- // its range, emit a DWARF piece for it.
- if (Reg >= 0 && Intersection.any()) {
- OutStreamer.AddComment("sub-register");
- emitDwarfRegOp(Streamer, Reg);
- EmitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset);
- CurPos = Offset + Size;
-
- // Mark it as emitted.
- Coverage.set(Offset, Offset + Size);
- }
- }
+ DebugLocDwarfExpression Expr(*this, Streamer);
+ Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits);
+}
- if (CurPos == PieceOffsetInBits) {
- // FIXME: We have no reasonable way of handling errors in here.
- Streamer.EmitInt8(dwarf::DW_OP_nop,
- "nop (could not find a dwarf register number)");
- }
+void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer,
+ unsigned PieceSizeInBits,
+ unsigned PieceOffsetInBits) const {
+ DebugLocDwarfExpression Expr(*this, Streamer);
+ Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
}
/// EmitDwarfRegOp - Emit dwarf register operation.
void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
- const MachineLocation &MLoc,
- bool Indirect) const {
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
- int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+ const MachineLocation &MLoc) const {
+ DebugLocDwarfExpression Expr(*this, Streamer);
+ const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
+ int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
if (Reg < 0) {
// We assume that pointers are always in an addressable register.
- if (Indirect || MLoc.isIndirect()) {
+ if (MLoc.isIndirect())
// FIXME: We have no reasonable way of handling errors in here. The
// caller might be in the middle of a dwarf expression. We should
// probably assert that Reg >= 0 once debug info generation is more
// mature.
- Streamer.EmitInt8(dwarf::DW_OP_nop,
- "nop (invalid dwarf register number for indirect loc)");
- return;
- }
+ return Expr.EmitOp(dwarf::DW_OP_nop,
+ "nop (could not find a dwarf register number)");
// Attempt to find a valid super- or sub-register.
- return EmitDwarfRegOpPiece(Streamer, MLoc);
+ if (!Expr.AddMachineRegPiece(MLoc.getReg()))
+ Expr.EmitOp(dwarf::DW_OP_nop,
+ "nop (could not find a dwarf register number)");
+ return;
}
if (MLoc.isIndirect())
- emitDwarfRegOpIndirect(Streamer, Reg, MLoc.getOffset(), Indirect);
- else if (Indirect)
- emitDwarfRegOpIndirect(Streamer, Reg, 0, false);
+ Expr.AddRegIndirect(Reg, MLoc.getOffset());
else
- emitDwarfRegOp(Streamer, Reg);
+ Expr.AddReg(Reg);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index cca5f22..e6e7c97 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -64,7 +65,7 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
if (LocInfo->getNumOperands() != 0)
if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine)))
LocCookie = CI->getZExtValue();
}
@@ -90,8 +91,19 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
assert(MCAI && "No MCAsmInfo");
if (!MCAI->useIntegratedAssembler() &&
!OutStreamer.isIntegratedAssemblerRequired()) {
+ emitInlineAsmStart();
OutStreamer.EmitRawText(Str);
- emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), nullptr);
+ // If we have a machine function then grab the MCSubtarget off of that,
+ // otherwise we're at the module level and want to construct one from
+ // the default CPU and target triple.
+ if (MF) {
+ emitInlineAsmEnd(MF->getSubtarget<MCSubtargetInfo>(), nullptr);
+ } else {
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple(), TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+ emitInlineAsmEnd(*STI, nullptr);
+ }
return;
}
@@ -137,11 +149,13 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
// emitInlineAsmEnd().
MCSubtargetInfo STIOrig = *STI;
- MCTargetOptions MCOptions;
- if (MF)
- MCOptions = MF->getTarget().Options.MCOptions;
- std::unique_ptr<MCTargetAsmParser> TAP(
- TM.getTarget().createMCAsmParser(*STI, *Parser, *MII, MCOptions));
+ // We create a new MCInstrInfo here since we might be at the module level
+ // and not have a MachineFunction to initialize the TargetInstrInfo from and
+ // we only need MCInstrInfo for asm parsing. We create one unconditionally
+ // because it's not subtarget dependent.
+ std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
+ std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
+ *STI, *Parser, *MII, TM.Options.MCOptions));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
@@ -152,6 +166,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
}
+ emitInlineAsmStart();
// Don't implicitly switch to the text section before the asm.
int Res = Parser->Run(/*NoInitialTextSection*/ true,
/*NoFinalize*/ true);
@@ -467,7 +482,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
if (MI->getOperand(i-1).isMetadata() &&
(LocMD = MI->getOperand(i-1).getMetadata()) &&
LocMD->getNumOperands() != 0) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
LocCookie = CI->getZExtValue();
break;
}
@@ -505,7 +521,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
/// for their own strange codes.
void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
const char *Code) const {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
if (!strcmp(Code, "private")) {
OS << DL->getPrivateGlobalPrefix();
} else if (!strcmp(Code, "comment")) {
@@ -566,5 +582,7 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
return true;
}
+void AsmPrinter::emitInlineAsmStart() const {}
+
void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
const MCSubtargetInfo *EndInfo) const {}
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 0cc8353..42be114 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -15,10 +15,10 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
+#include "DIEHash.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCStreamer.h"
-#include "DIEHash.h"
namespace llvm {
class ByteStreamer {
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index e6b7d64..01d2c72 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_library(LLVMAsmPrinter
DwarfCFIException.cpp
DwarfCompileUnit.cpp
DwarfDebug.cpp
+ DwarfExpression.cpp
DwarfFile.cpp
DwarfStringPool.cpp
DwarfUnit.cpp
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 50ea369..64ba56b 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -11,8 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "DIE.h"
-
+#include "llvm/CodeGen/DIE.h"
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
deleted file mode 100644
index e310aef..0000000
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ /dev/null
@@ -1,587 +0,0 @@
-//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Data structures for DWARF info entries.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
-
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Dwarf.h"
-#include <vector>
-
-namespace llvm {
-class AsmPrinter;
-class MCExpr;
-class MCSymbol;
-class raw_ostream;
-class DwarfTypeUnit;
-
-//===--------------------------------------------------------------------===//
-/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a
-/// Dwarf abbreviation.
-class DIEAbbrevData {
- /// Attribute - Dwarf attribute code.
- ///
- dwarf::Attribute Attribute;
-
- /// Form - Dwarf form code.
- ///
- dwarf::Form Form;
-
-public:
- DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {}
-
- // Accessors.
- dwarf::Attribute getAttribute() const { return Attribute; }
- dwarf::Form getForm() const { return Form; }
-
- /// Profile - Used to gather unique data for the abbreviation folding set.
- ///
- void Profile(FoldingSetNodeID &ID) const;
-};
-
-//===--------------------------------------------------------------------===//
-/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
-/// information object.
-class DIEAbbrev : public FoldingSetNode {
- /// Unique number for node.
- ///
- unsigned Number;
-
- /// Tag - Dwarf tag code.
- ///
- dwarf::Tag Tag;
-
- /// Children - Whether or not this node has children.
- ///
- // This cheats a bit in all of the uses since the values in the standard
- // are 0 and 1 for no children and children respectively.
- bool Children;
-
- /// Data - Raw data bytes for abbreviation.
- ///
- SmallVector<DIEAbbrevData, 12> Data;
-
-public:
- DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C), Data() {}
-
- // Accessors.
- dwarf::Tag getTag() const { return Tag; }
- unsigned getNumber() const { return Number; }
- bool hasChildren() const { return Children; }
- const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
- void setChildrenFlag(bool hasChild) { Children = hasChild; }
- void setNumber(unsigned N) { Number = N; }
-
- /// AddAttribute - Adds another set of attribute information to the
- /// abbreviation.
- void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) {
- Data.push_back(DIEAbbrevData(Attribute, Form));
- }
-
- /// Profile - Used to gather unique data for the abbreviation folding set.
- ///
- void Profile(FoldingSetNodeID &ID) const;
-
- /// Emit - Print the abbreviation using the specified asm printer.
- ///
- void Emit(AsmPrinter *AP) const;
-
-#ifndef NDEBUG
- void print(raw_ostream &O);
- void dump();
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIE - A structured debug information entry. Has an abbreviation which
-/// describes its organization.
-class DIEValue;
-
-class DIE {
-protected:
- /// Offset - Offset in debug info section.
- ///
- unsigned Offset;
-
- /// Size - Size of instance + children.
- ///
- unsigned Size;
-
- /// Abbrev - Buffer for constructing abbreviation.
- ///
- DIEAbbrev Abbrev;
-
- /// Children DIEs.
- ///
- // This can't be a vector<DIE> because pointer validity is requirent for the
- // Parent pointer and DIEEntry.
- // It can't be a list<DIE> because some clients need pointer validity before
- // the object has been added to any child list
- // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may
- // be more convoluted than beneficial.
- std::vector<std::unique_ptr<DIE>> Children;
-
- DIE *Parent;
-
- /// Attribute values.
- ///
- SmallVector<DIEValue *, 12> Values;
-
-protected:
- DIE()
- : Offset(0), Size(0), Abbrev((dwarf::Tag)0, dwarf::DW_CHILDREN_no),
- Parent(nullptr) {}
-
-public:
- explicit DIE(dwarf::Tag Tag)
- : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no),
- Parent(nullptr) {}
-
- // Accessors.
- DIEAbbrev &getAbbrev() { return Abbrev; }
- const DIEAbbrev &getAbbrev() const { return Abbrev; }
- unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
- dwarf::Tag getTag() const { return Abbrev.getTag(); }
- unsigned getOffset() const { return Offset; }
- unsigned getSize() const { return Size; }
- const std::vector<std::unique_ptr<DIE>> &getChildren() const {
- return Children;
- }
- const SmallVectorImpl<DIEValue *> &getValues() const { return Values; }
- DIE *getParent() const { return Parent; }
- /// Climb up the parent chain to get the compile or type unit DIE this DIE
- /// belongs to.
- const DIE *getUnit() const;
- /// Similar to getUnit, returns null when DIE is not added to an
- /// owner yet.
- const DIE *getUnitOrNull() const;
- void setOffset(unsigned O) { Offset = O; }
- void setSize(unsigned S) { Size = S; }
-
- /// addValue - Add a value and attributes to a DIE.
- ///
- void addValue(dwarf::Attribute Attribute, dwarf::Form Form, DIEValue *Value) {
- Abbrev.AddAttribute(Attribute, Form);
- Values.push_back(Value);
- }
-
- /// addChild - Add a child to the DIE.
- ///
- void addChild(std::unique_ptr<DIE> Child) {
- assert(!Child->getParent());
- Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
- Child->Parent = this;
- Children.push_back(std::move(Child));
- }
-
- /// findAttribute - Find a value in the DIE with the attribute given,
- /// returns NULL if no such attribute exists.
- DIEValue *findAttribute(dwarf::Attribute Attribute) const;
-
-#ifndef NDEBUG
- void print(raw_ostream &O, unsigned IndentCount = 0) const;
- void dump();
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIEValue - A debug information entry value. Some of these roughly correlate
-/// to DWARF attribute classes.
-///
-class DIEValue {
- virtual void anchor();
-
-public:
- enum Type {
- isInteger,
- isString,
- isExpr,
- isLabel,
- isDelta,
- isEntry,
- isTypeSignature,
- isBlock,
- isLoc,
- isLocList,
- };
-
-protected:
- /// Ty - Type of data stored in the value.
- ///
- Type Ty;
-
- explicit DIEValue(Type T) : Ty(T) {}
- virtual ~DIEValue() {}
-
-public:
- // Accessors
- Type getType() const { return Ty; }
-
- /// EmitValue - Emit value via the Dwarf writer.
- ///
- virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0;
-
- /// SizeOf - Return the size of a value in bytes.
- ///
- virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0;
-
-#ifndef NDEBUG
- virtual void print(raw_ostream &O) const = 0;
- void dump() const;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIEInteger - An integer value DIE.
-///
-class DIEInteger : public DIEValue {
- uint64_t Integer;
-
-public:
- explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
-
- /// BestForm - Choose the best form for integer.
- ///
- static dwarf::Form BestForm(bool IsSigned, uint64_t Int) {
- if (IsSigned) {
- const int64_t SignedInt = Int;
- if ((char)Int == SignedInt)
- return dwarf::DW_FORM_data1;
- if ((short)Int == SignedInt)
- return dwarf::DW_FORM_data2;
- if ((int)Int == SignedInt)
- return dwarf::DW_FORM_data4;
- } else {
- if ((unsigned char)Int == Int)
- return dwarf::DW_FORM_data1;
- if ((unsigned short)Int == Int)
- return dwarf::DW_FORM_data2;
- if ((unsigned int)Int == Int)
- return dwarf::DW_FORM_data4;
- }
- return dwarf::DW_FORM_data8;
- }
-
- /// EmitValue - Emit integer of appropriate size.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- uint64_t getValue() const { return Integer; }
-
- /// SizeOf - Determine size of integer value in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIEExpr - An expression DIE.
-//
-class DIEExpr : public DIEValue {
- const MCExpr *Expr;
-
-public:
- explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {}
-
- /// EmitValue - Emit expression value.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- /// getValue - Get MCExpr.
- ///
- const MCExpr *getValue() const { return Expr; }
-
- /// SizeOf - Determine size of expression value in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isExpr; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIELabel - A label DIE.
-//
-class DIELabel : public DIEValue {
- const MCSymbol *Label;
-
-public:
- explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
-
- /// EmitValue - Emit label value.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- /// getValue - Get MCSymbol.
- ///
- const MCSymbol *getValue() const { return Label; }
-
- /// SizeOf - Determine size of label value in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIEDelta - A simple label difference DIE.
-///
-class DIEDelta : public DIEValue {
- const MCSymbol *LabelHi;
- const MCSymbol *LabelLo;
-
-public:
- DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
- : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
-
- /// EmitValue - Emit delta value.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- /// SizeOf - Determine size of delta value in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIEString - A container for string values.
-///
-class DIEString : public DIEValue {
- const DIEValue *Access;
- StringRef Str;
-
-public:
- DIEString(const DIEValue *Acc, StringRef S)
- : DIEValue(isString), Access(Acc), Str(S) {}
-
- /// getString - Grab the string out of the object.
- StringRef getString() const { return Str; }
-
- /// EmitValue - Emit delta value.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- /// SizeOf - Determine size of delta value in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *D) { return D->getType() == isString; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIEEntry - A pointer to another debug information entry. An instance of
-/// this class can also be used as a proxy for a debug information entry not
-/// yet defined (ie. types.)
-class DIEEntry : public DIEValue {
- DIE &Entry;
-
-public:
- explicit DIEEntry(DIE &E) : DIEValue(isEntry), Entry(E) {
- }
-
- DIE &getEntry() const { return Entry; }
-
- /// EmitValue - Emit debug information entry offset.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- /// SizeOf - Determine size of debug information entry in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override {
- return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP)
- : sizeof(int32_t);
- }
-
- /// Returns size of a ref_addr entry.
- static unsigned getRefAddrSize(AsmPrinter *AP);
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// \brief A signature reference to a type unit.
-class DIETypeSignature : public DIEValue {
- const DwarfTypeUnit &Unit;
-
-public:
- explicit DIETypeSignature(const DwarfTypeUnit &Unit)
- : DIEValue(isTypeSignature), Unit(Unit) {}
-
- /// \brief Emit type unit signature.
- void EmitValue(AsmPrinter *Asm, dwarf::Form Form) const override;
-
- /// Returns size of a ref_sig8 entry.
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override {
- assert(Form == dwarf::DW_FORM_ref_sig8);
- return 8;
- }
-
- // \brief Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) {
- return E->getType() == isTypeSignature;
- }
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
- void dump() const;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIELoc - Represents an expression location.
-//
-class DIELoc : public DIEValue, public DIE {
- mutable unsigned Size; // Size in bytes excluding size header.
-public:
- DIELoc() : DIEValue(isLoc), Size(0) {}
-
- /// ComputeSize - Calculate the size of the location expression.
- ///
- unsigned ComputeSize(AsmPrinter *AP) const;
-
- /// BestForm - Choose the best form for data.
- ///
- dwarf::Form BestForm(unsigned DwarfVersion) const {
- if (DwarfVersion > 3)
- return dwarf::DW_FORM_exprloc;
- // Pre-DWARF4 location expressions were blocks and not exprloc.
- if ((unsigned char)Size == Size)
- return dwarf::DW_FORM_block1;
- if ((unsigned short)Size == Size)
- return dwarf::DW_FORM_block2;
- if ((unsigned int)Size == Size)
- return dwarf::DW_FORM_block4;
- return dwarf::DW_FORM_block;
- }
-
- /// EmitValue - Emit location data.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- /// SizeOf - Determine size of location data in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isLoc; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIEBlock - Represents a block of values.
-//
-class DIEBlock : public DIEValue, public DIE {
- mutable unsigned Size; // Size in bytes excluding size header.
-public:
- DIEBlock() : DIEValue(isBlock), Size(0) {}
-
- /// ComputeSize - Calculate the size of the location expression.
- ///
- unsigned ComputeSize(AsmPrinter *AP) const;
-
- /// BestForm - Choose the best form for data.
- ///
- dwarf::Form BestForm() const {
- if ((unsigned char)Size == Size)
- return dwarf::DW_FORM_block1;
- if ((unsigned short)Size == Size)
- return dwarf::DW_FORM_block2;
- if ((unsigned int)Size == Size)
- return dwarf::DW_FORM_block4;
- return dwarf::DW_FORM_block;
- }
-
- /// EmitValue - Emit location data.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- /// SizeOf - Determine size of location data in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-//===--------------------------------------------------------------------===//
-/// DIELocList - Represents a pointer to a location list in the debug_loc
-/// section.
-//
-class DIELocList : public DIEValue {
- // Index into the .debug_loc vector.
- size_t Index;
-
-public:
- DIELocList(size_t I) : DIEValue(isLocList), Index(I) {}
-
- /// getValue - Grab the current index out.
- size_t getValue() const { return Index; }
-
- /// EmitValue - Emit location data.
- ///
- void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
-
- /// SizeOf - Determine size of location data in bytes.
- ///
- unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isLocList; }
-
-#ifndef NDEBUG
- void print(raw_ostream &O) const override;
-#endif
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index b2a3ba8..1e2ba2c 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -13,11 +13,11 @@
#include "ByteStreamer.h"
#include "DIEHash.h"
-#include "DIE.h"
#include "DwarfDebug.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Endian.h"
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index 872aa0e..ac014b7 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -14,8 +14,8 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
-#include "DIE.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/DIE.h"
#include "llvm/Support/MD5.h"
namespace llvm {
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 6cca985..6d55c03 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -11,8 +11,8 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
namespace llvm {
class MDNode;
@@ -74,7 +74,7 @@ public:
MachineLocation getLoc() const { return Loc; }
const MDNode *getVariableNode() const { return Variable; }
DIVariable getVariable() const { return DIVariable(Variable); }
- bool isVariablePiece() const { return getExpression().isVariablePiece(); }
+ bool isBitPiece() const { return getExpression().isBitPiece(); }
DIExpression getExpression() const { return DIExpression(Expression); }
friend bool operator==(const Value &, const Value &);
friend bool operator<(const Value &, const Value &);
@@ -101,8 +101,8 @@ public:
DIVariable Var(Values[0].Variable);
DIExpression NextExpr(Next.Values[0].Expression);
DIVariable NextVar(Next.Values[0].Variable);
- if (Var == NextVar && Expr.isVariablePiece() &&
- NextExpr.isVariablePiece()) {
+ if (Var == NextVar && Expr.isBitPiece() &&
+ NextExpr.isBitPiece()) {
addValues(Next.Values);
End = Next.End;
return true;
@@ -131,7 +131,7 @@ public:
Values.append(Vals.begin(), Vals.end());
sortUniqueValues();
assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
- return V.isVariablePiece();
+ return V.isBitPiece();
}) && "value must be a piece");
}
@@ -176,8 +176,8 @@ inline bool operator==(const DebugLocEntry::Value &A,
/// Compare two pieces based on their offset.
inline bool operator<(const DebugLocEntry::Value &A,
const DebugLocEntry::Value &B) {
- return A.getExpression().getPieceOffset() <
- B.getExpression().getPieceOffset();
+ return A.getExpression().getBitPieceOffset() <
+ B.getExpression().getBitPieceOffset();
}
}
diff --git a/lib/CodeGen/AsmPrinter/DebugLocList.h b/lib/CodeGen/AsmPrinter/DebugLocList.h
index 2a4f58f..0f1d2ed 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocList.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocList.h
@@ -10,8 +10,8 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
-#include "llvm/ADT/SmallVector.h"
#include "DebugLocEntry.h"
+#include "llvm/ADT/SmallVector.h"
namespace llvm {
class DwarfCompileUnit;
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 7e87566..a71f35e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#include "DwarfAccelTable.h"
-#include "DIE.h"
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 3cdf678..74963da 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -14,9 +14,9 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
-#include "DIE.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DIE.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
@@ -215,8 +215,8 @@ private:
#endif
};
- DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
- void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
+ DwarfAccelTable(const DwarfAccelTable &) = delete;
+ void operator=(const DwarfAccelTable &) = delete;
// Internal Functions
void EmitHeader(AsmPrinter *);
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 0dc52da..f45b24c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -51,7 +51,8 @@ void DwarfCFIException::endModule() {
if (moveTypeModule == AsmPrinter::CFI_M_Debug)
Asm->OutStreamer.EmitCFISections(false, true);
- if (!Asm->MAI->usesItaniumLSDAForExceptions())
+ // SjLj uses this pass and it doesn't need this info.
+ if (!Asm->MAI->usesCFIForEH())
return;
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -90,7 +91,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+ const Function *Per = MMI->getPersonality();
shouldEmitPersonality = hasLandingPads &&
PerEncoding != dwarf::DW_EH_PE_omit && Per;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 2f1b0e5..dcc5fe4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,5 +1,5 @@
#include "DwarfCompileUnit.h"
-
+#include "DwarfExpression.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
@@ -10,8 +10,8 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
namespace llvm {
@@ -103,7 +103,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
assert(GV.isGlobalVariable());
- DIScope GVContext = DD->resolve(GV.getContext());
+ DIScope GVContext = GV.getContext();
DIType GTy = DD->resolve(GV.getType());
// Construct the context before querying for the existence of the DIE in
@@ -122,7 +122,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
} else {
- DeclContext = resolve(GV.getContext());
+ DeclContext = GV.getContext();
// Add name and type.
addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
addType(*VariableDIE, GTy);
@@ -292,10 +292,10 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) {
// Only include DW_AT_frame_base in full debug info
if (!includeMinimalInlineScopes()) {
- const TargetRegisterInfo *RI =
- Asm->TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
MachineLocation Location(RI->getFrameRegister(*Asm->MF));
- addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ if (RI->isPhysicalRegister(Location.getReg()))
+ addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
}
// Add name to the name table, we do this here because we're guaranteed
@@ -515,15 +515,23 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
}
// .. else use frame index.
- int FI = DV.getFrameIndex();
- if (FI != ~0) {
+ if (DV.getFrameIndex().back() == ~0)
+ return VariableDie;
+
+ auto Expr = DV.getExpression().begin();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ for (auto FI : DV.getFrameIndex()) {
unsigned FrameReg = 0;
- const TargetFrameLowering *TFI =
- Asm->TM.getSubtargetImpl()->getFrameLowering();
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- MachineLocation Location(FrameReg, Offset);
- addVariableAddress(DV, *VariableDie, Location);
+ assert(Expr != DV.getExpression().end() &&
+ "Wrong number of expressions");
+ DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
+ DwarfExpr.AddExpression(Expr->begin(), Expr->end());
+ ++Expr;
}
+ addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
return VariableDie;
}
@@ -694,7 +702,7 @@ void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
DIVariable DV(Variables.getElement(vi));
assert(DV.isVariable());
- DbgVariable NewVar(DV, DIExpression(nullptr), DD);
+ DbgVariable NewVar(DV, DIExpression(), DD);
auto VariableDie = constructVariableDIE(NewVar);
applyVariableAttributes(NewVar, *VariableDie);
SPDIE->addChild(std::move(VariableDie));
@@ -736,24 +744,22 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
else if (DV.isBlockByrefVariable())
addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
else
- addAddress(Die, dwarf::DW_AT_location, Location,
- DV.getVariable().isIndirect());
+ addAddress(Die, dwarf::DW_AT_location, Location);
}
/// Add an address attribute to a die based on the location provided.
void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
- const MachineLocation &Location,
- bool Indirect) {
+ const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- if (Location.isReg() && !Indirect)
- addRegisterOpPiece(*Loc, Location.getReg());
- else {
- addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
- if (Indirect && !Location.isReg()) {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- }
- }
+ bool validReg;
+ if (Location.isReg())
+ validReg = addRegisterOpPiece(*Loc, Location.getReg());
+ else
+ validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+
+ if (!validReg)
+ return;
// Now attach the location information to the DIE.
addBlock(Die, Attribute, Loc);
@@ -767,53 +773,21 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- unsigned N = DV.getNumAddrElements();
- unsigned i = 0;
- if (Location.isReg()) {
- if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_plus) {
- assert(!DV.getVariable().isIndirect() &&
- "double indirection not handled");
- // If first address element is OpPlus then emit
- // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
- addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1));
- i = 2;
- } else if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_deref) {
- assert(!DV.getVariable().isIndirect() &&
- "double indirection not handled");
- addRegisterOpPiece(*Loc, Location.getReg(),
- DV.getExpression().getPieceSize(),
- DV.getExpression().getPieceOffset());
- i = 3;
- } else
- addRegisterOpPiece(*Loc, Location.getReg());
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ assert(DV.getExpression().size() == 1);
+ DIExpression Expr = DV.getExpression().back();
+ bool ValidReg;
+ if (Location.getOffset()) {
+ ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(),
+ Location.getOffset());
+ if (ValidReg)
+ DwarfExpr.AddExpression(Expr.begin(), Expr.end());
} else
- addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
-
- for (; i < N; ++i) {
- uint64_t Element = DV.getAddrElement(i);
- if (Element == dwarf::DW_OP_plus) {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
-
- } else if (Element == dwarf::DW_OP_deref) {
- if (!Location.isReg())
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-
- } else if (Element == dwarf::DW_OP_piece) {
- const unsigned SizeOfByte = 8;
- unsigned PieceOffsetInBits = DV.getAddrElement(++i) * SizeOfByte;
- unsigned PieceSizeInBits = DV.getAddrElement(++i) * SizeOfByte;
- // Emit DW_OP_bit_piece Size Offset.
- assert(PieceSizeInBits > 0 && "piece has zero size");
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece);
- addUInt(*Loc, dwarf::DW_FORM_udata, PieceSizeInBits);
- addUInt(*Loc, dwarf::DW_FORM_udata, PieceOffsetInBits);
- } else
- llvm_unreachable("unknown DIBuilder Opcode");
- }
+ ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg());
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
+ if (ValidReg)
+ addBlock(Die, Attribute, Loc);
}
/// Add a Dwarf loclistptr attribute data and value.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index e521f39..c66af65 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,9 +15,9 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#include "DwarfUnit.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Dwarf.h"
namespace llvm {
@@ -213,7 +213,7 @@ public:
MachineLocation Location);
/// Add an address attribute to a die based on the location provided.
void addAddress(DIE &Die, dwarf::Attribute Attribute,
- const MachineLocation &Location, bool Indirect = false);
+ const MachineLocation &Location);
/// Start with the address based on the location provided, and generate the
/// DWARF information necessary to find the actual variable (navigating the
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 230ea46..aa1f79f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
#include "DwarfDebug.h"
-
#include "ByteStreamer.h"
-#include "DwarfCompileUnit.h"
-#include "DIE.h"
#include "DIEHash.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfExpression.h"
#include "DwarfUnit.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/Constants.h"
@@ -490,9 +490,6 @@ void DwarfDebug::beginModule() {
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
-
- // Prime section data.
- SectionMap[Asm->getObjFileLowering().getTextSection()];
}
void DwarfDebug::finishVariableDefinitions() {
@@ -608,53 +605,6 @@ void DwarfDebug::finalizeModuleInfo() {
SkeletonHolder.computeSizeAndOffsets();
}
-void DwarfDebug::endSections() {
- // Filter labels by section.
- for (const SymbolCU &SCU : ArangeLabels) {
- if (SCU.Sym->isInSection()) {
- // Make a note of this symbol and it's section.
- const MCSection *Section = &SCU.Sym->getSection();
- if (!Section->getKind().isMetadata())
- SectionMap[Section].push_back(SCU);
- } else {
- // Some symbols (e.g. common/bss on mach-o) can have no section but still
- // appear in the output. This sucks as we rely on sections to build
- // arange spans. We can do it without, but it's icky.
- SectionMap[nullptr].push_back(SCU);
- }
- }
-
- // Build a list of sections used.
- std::vector<const MCSection *> Sections;
- for (const auto &it : SectionMap) {
- const MCSection *Section = it.first;
- Sections.push_back(Section);
- }
-
- // Sort the sections into order.
- // This is only done to ensure consistent output order across different runs.
- std::sort(Sections.begin(), Sections.end(), SectionSort);
-
- // Add terminating symbols for each section.
- for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) {
- const MCSection *Section = Sections[ID];
- MCSymbol *Sym = nullptr;
-
- if (Section) {
- // We can't call MCSection::getLabelEndName, as it's only safe to do so
- // if we know the section name up-front. For user-created sections, the
- // resulting label may not be valid to use as a label. (section names can
- // use a greater set of characters on some systems)
- Sym = Asm->GetTempSymbol("debug_end", ID);
- Asm->OutStreamer.SwitchSection(Section);
- Asm->OutStreamer.EmitLabel(Sym);
- }
-
- // Insert a final terminator.
- SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
- }
-}
-
// Emit all Dwarf sections that should come after the content.
void DwarfDebug::endModule() {
assert(CurFn == nullptr);
@@ -666,10 +616,6 @@ void DwarfDebug::endModule() {
if (!DwarfInfoSectionSym)
return;
- // End any existing sections.
- // TODO: Does this need to happen?
- endSections();
-
// Finalize the debug info for the module.
finalizeModuleInfo();
@@ -783,10 +729,9 @@ void DwarfDebug::collectVariableInfoFromMMITable(
DIVariable DV(VI.Var);
DIExpression Expr(VI.Expr);
ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
- ConcreteVariables.push_back(make_unique<DbgVariable>(DV, Expr, this));
- DbgVariable *RegVar = ConcreteVariables.back().get();
- RegVar->setFrameIndex(VI.Slot);
- InfoHolder.addScopeVariable(Scope, RegVar);
+ auto RegVar = make_unique<DbgVariable>(DV, Expr, this, VI.Slot);
+ if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
+ ConcreteVariables.push_back(std::move(RegVar));
}
}
@@ -818,12 +763,12 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
/// Determine whether two variable pieces overlap.
static bool piecesOverlap(DIExpression P1, DIExpression P2) {
- if (!P1.isVariablePiece() || !P2.isVariablePiece())
+ if (!P1.isBitPiece() || !P2.isBitPiece())
return true;
- unsigned l1 = P1.getPieceOffset();
- unsigned l2 = P2.getPieceOffset();
- unsigned r1 = l1 + P1.getPieceSize();
- unsigned r2 = l2 + P2.getPieceSize();
+ unsigned l1 = P1.getBitPieceOffset();
+ unsigned l2 = P2.getBitPieceOffset();
+ unsigned r1 = l1 + P1.getBitPieceSize();
+ unsigned r2 = l2 + P2.getBitPieceSize();
// True where [l1,r1[ and [r1,r2[ overlap.
return (l1 < r2) && (l2 < r1);
}
@@ -842,7 +787,8 @@ static bool piecesOverlap(DIExpression P1, DIExpression P2) {
// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
// 2 | | ...
// 3 | [clobber reg0]
-// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x.
+// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of
+// x.
//
// Output:
//
@@ -894,7 +840,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
bool couldMerge = false;
// If this is a piece, it may belong to the current DebugLocEntry.
- if (DIExpr.isVariablePiece()) {
+ if (DIExpr.isBitPiece()) {
// Add this value to the list of open ranges.
OpenRanges.push_back(Value);
@@ -950,11 +896,9 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
continue;
LexicalScope *Scope = nullptr;
- if (MDNode *IA = DV.getInlinedAt()) {
- DebugLoc DL = DebugLoc::getFromDILocation(IA);
- Scope = LScopes.findInlinedScope(DebugLoc::get(
- DL.getLine(), DL.getCol(), DV.getContext(), IA));
- } else
+ if (MDNode *IA = DV.getInlinedAt())
+ Scope = LScopes.findInlinedScope(DV.getContext(), IA);
+ else
Scope = LScopes.findLexicalScope(DV.getContext());
// If variable scope is not found then skip this variable.
if (!Scope)
@@ -1026,8 +970,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
if (DL == PrologEndLoc) {
Flags |= DWARF2_FLAG_PROLOGUE_END;
PrologEndLoc = DebugLoc();
+ Flags |= DWARF2_FLAG_IS_STMT;
}
- if (PrologEndLoc.isUnknown())
+ if (DL.getLine() !=
+ Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
Flags |= DWARF2_FLAG_IS_STMT;
if (!DL.isUnknown()) {
@@ -1117,8 +1063,12 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
for (const auto &MBB : *MF)
for (const auto &MI : MBB)
if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
- !MI.getDebugLoc().isUnknown())
+ !MI.getDebugLoc().isUnknown()) {
+ // Did the target forget to set the FrameSetup flag for CFI insns?
+ assert(!MI.isCFIInstruction() &&
+ "First non-frame-setup instruction is a CFI instruction.");
return MI.getDebugLoc();
+ }
return DebugLoc();
}
@@ -1172,7 +1122,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
Asm->OutStreamer.EmitLabel(FunctionBeginSym);
// Calculate history for local variables.
- calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(),
+ calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
DbgValues);
// Request labels for the full history.
@@ -1187,7 +1137,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym;
- if (Ranges.front().first->getDebugExpression().isVariablePiece()) {
+ if (Ranges.front().first->getDebugExpression().isBitPiece()) {
// Mark all non-overlapping initial pieces.
for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
DIExpression Piece = I->first->getDebugExpression();
@@ -1217,12 +1167,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!PrologEndLoc.isUnknown()) {
DebugLoc FnStartDL =
PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
- recordSourceLine(
- FnStartDL.getLine(), FnStartDL.getCol(),
- FnStartDL.getScope(MF->getFunction()->getContext()),
- // We'd like to list the prologue as "not statements" but GDB behaves
- // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
- DWARF2_FLAG_IS_STMT);
+
+ // We'd like to list the prologue as "not statements" but GDB behaves
+ // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+ recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
+ FnStartDL.getScope(MF->getFunction()->getContext()),
+ DWARF2_FLAG_IS_STMT);
}
}
@@ -1350,8 +1300,8 @@ void DwarfDebug::emitSectionLabels() {
if (useSplitDwarf()) {
DwarfInfoDWOSectionSym =
emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo");
- DwarfTypesDWOSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo");
+ DwarfTypesDWOSectionSym = emitSectionSym(
+ Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo");
}
DwarfAbbrevSectionSym =
emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
@@ -1553,7 +1503,6 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
return dwarf::GIEK_TYPE;
case dwarf::DW_TAG_subprogram:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
- case dwarf::DW_TAG_constant:
case dwarf::DW_TAG_variable:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
case dwarf::DW_TAG_enumerator:
@@ -1656,7 +1605,7 @@ void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
const DITypeIdentifierMap &Map,
ArrayRef<DebugLocEntry::Value> Values) {
assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
- return P.isVariablePiece();
+ return P.isBitPiece();
}) && "all values are expected to be pieces");
assert(std::is_sorted(Values.begin(), Values.end()) &&
"pieces are expected to be sorted");
@@ -1664,35 +1613,25 @@ void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
unsigned Offset = 0;
for (auto Piece : Values) {
DIExpression Expr = Piece.getExpression();
- unsigned PieceOffset = Expr.getPieceOffset();
- unsigned PieceSize = Expr.getPieceSize();
+ unsigned PieceOffset = Expr.getBitPieceOffset();
+ unsigned PieceSize = Expr.getBitPieceSize();
assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
if (Offset < PieceOffset) {
// The DWARF spec seriously mandates pieces with no locations for gaps.
- Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*8);
+ Asm->EmitDwarfOpPiece(Streamer, PieceOffset-Offset);
Offset += PieceOffset-Offset;
}
-
Offset += PieceSize;
- const unsigned SizeOfByte = 8;
#ifndef NDEBUG
DIVariable Var = Piece.getVariable();
- assert(!Var.isIndirect() && "indirect address for piece");
unsigned VarSize = Var.getSizeInBits(Map);
- assert(PieceSize+PieceOffset <= VarSize/SizeOfByte
+ assert(PieceSize+PieceOffset <= VarSize
&& "piece is larger than or outside of variable");
- assert(PieceSize*SizeOfByte != VarSize
+ assert(PieceSize != VarSize
&& "piece covers entire variable");
#endif
- if (Piece.isLocation() && Piece.getLoc().isReg())
- Asm->EmitDwarfRegOpPiece(Streamer,
- Piece.getLoc(),
- PieceSize*SizeOfByte);
- else {
- emitDebugLocValue(Streamer, Piece);
- Asm->EmitDwarfOpPiece(Streamer, PieceSize*SizeOfByte);
- }
+ emitDebugLocValue(Streamer, Piece, PieceOffset);
}
}
@@ -1700,7 +1639,7 @@ void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocEntry &Entry) {
const DebugLocEntry::Value Value = Entry.getValues()[0];
- if (Value.isVariablePiece())
+ if (Value.isBitPiece())
// Emit all pieces that belong to the same variable and range.
return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues());
@@ -1709,62 +1648,33 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
}
void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
- const DebugLocEntry::Value &Value) {
+ const DebugLocEntry::Value &Value,
+ unsigned PieceOffsetInBits) {
DIVariable DV = Value.getVariable();
+ DebugLocDwarfExpression DwarfExpr(*Asm, Streamer);
+
// Regular entry.
if (Value.isInt()) {
DIBasicType BTy(resolve(DV.getType()));
if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
- BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
- Streamer.EmitInt8(dwarf::DW_OP_consts, "DW_OP_consts");
- Streamer.EmitSLEB128(Value.getInt());
- } else {
- Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu");
- Streamer.EmitULEB128(Value.getInt());
- }
+ BTy.getEncoding() == dwarf::DW_ATE_signed_char))
+ DwarfExpr.AddSignedConstant(Value.getInt());
+ else
+ DwarfExpr.AddUnsignedConstant(Value.getInt());
} else if (Value.isLocation()) {
MachineLocation Loc = Value.getLoc();
DIExpression Expr = Value.getExpression();
- if (!Expr)
+ if (!Expr || (Expr.getNumElements() == 0))
// Regular entry.
- Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
+ Asm->EmitDwarfRegOp(Streamer, Loc);
else {
// Complex address entry.
- unsigned N = Expr.getNumElements();
- unsigned i = 0;
- if (N >= 2 && Expr.getElement(0) == dwarf::DW_OP_plus) {
- if (Loc.getOffset()) {
- i = 2;
- Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
- Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
- Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst");
- Streamer.EmitSLEB128(Expr.getElement(1));
- } else {
- // If first address element is OpPlus then emit
- // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
- MachineLocation TLoc(Loc.getReg(), Expr.getElement(1));
- Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect());
- i = 2;
- }
- } else {
- Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
- }
-
- // Emit remaining complex address elements.
- for (; i < N; ++i) {
- uint64_t Element = Expr.getElement(i);
- if (Element == dwarf::DW_OP_plus) {
- Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst");
- Streamer.EmitULEB128(Expr.getElement(++i));
- } else if (Element == dwarf::DW_OP_deref) {
- if (!Loc.isReg())
- Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
- } else if (Element == dwarf::DW_OP_piece) {
- i += 3;
- // handled in emitDebugLocEntry.
- } else
- llvm_unreachable("unknown Opcode found in complex address");
- }
+ if (Loc.getOffset()) {
+ DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset());
+ DwarfExpr.AddExpression(Expr.begin(), Expr.end(), PieceOffsetInBits);
+ } else
+ DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(),
+ PieceOffsetInBits);
}
}
// else ... ignore constant fp. There is not any good way to
@@ -1841,13 +1751,26 @@ struct ArangeSpan {
// Emit a debug aranges section, containing a CU lookup for any
// address we can tie back to a CU.
void DwarfDebug::emitDebugARanges() {
- // Start the dwarf aranges section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfARangesSection());
+ // Provides a unique id per text section.
+ DenseMap<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
- typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> SpansType;
+ // Prime section data.
+ SectionMap[Asm->getObjFileLowering().getTextSection()];
- SpansType Spans;
+ // Filter labels by section.
+ for (const SymbolCU &SCU : ArangeLabels) {
+ if (SCU.Sym->isInSection()) {
+ // Make a note of this symbol and it's section.
+ const MCSection *Section = &SCU.Sym->getSection();
+ if (!Section->getKind().isMetadata())
+ SectionMap[Section].push_back(SCU);
+ } else {
+ // Some symbols (e.g. common/bss on mach-o) can have no section but still
+ // appear in the output. This sucks as we rely on sections to build
+ // arange spans. We can do it without, but it's icky.
+ SectionMap[nullptr].push_back(SCU);
+ }
+ }
// Build a list of sections used.
std::vector<const MCSection *> Sections;
@@ -1860,12 +1783,45 @@ void DwarfDebug::emitDebugARanges() {
// This is only done to ensure consistent output order across different runs.
std::sort(Sections.begin(), Sections.end(), SectionSort);
- // Build a set of address spans, sorted by CU.
+ // Add terminating symbols for each section.
+ for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) {
+ const MCSection *Section = Sections[ID];
+ MCSymbol *Sym = nullptr;
+
+ if (Section) {
+ // We can't call MCSection::getLabelEndName, as it's only safe to do so
+ // if we know the section name up-front. For user-created sections, the
+ // resulting label may not be valid to use as a label. (section names can
+ // use a greater set of characters on some systems)
+ Sym = Asm->GetTempSymbol("debug_end", ID);
+ Asm->OutStreamer.SwitchSection(Section);
+ Asm->OutStreamer.EmitLabel(Sym);
+ }
+
+ // Insert a final terminator.
+ SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
+ }
+
+ DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
+
for (const MCSection *Section : Sections) {
SmallVector<SymbolCU, 8> &List = SectionMap[Section];
if (List.size() < 2)
continue;
+ // If we have no section (e.g. common), just write out
+ // individual spans for each symbol.
+ if (!Section) {
+ for (const SymbolCU &Cur : List) {
+ ArangeSpan Span;
+ Span.Start = Cur.Sym;
+ Span.End = nullptr;
+ if (Cur.CU)
+ Spans[Cur.CU].push_back(Span);
+ }
+ continue;
+ }
+
// Sort the symbols by offset within the section.
std::sort(List.begin(), List.end(),
[&](const SymbolCU &A, const SymbolCU &B) {
@@ -1881,35 +1837,27 @@ void DwarfDebug::emitDebugARanges() {
return IA < IB;
});
- // If we have no section (e.g. common), just write out
- // individual spans for each symbol.
- if (!Section) {
- for (const SymbolCU &Cur : List) {
+ // Build spans between each label.
+ const MCSymbol *StartSym = List[0].Sym;
+ for (size_t n = 1, e = List.size(); n < e; n++) {
+ const SymbolCU &Prev = List[n - 1];
+ const SymbolCU &Cur = List[n];
+
+ // Try and build the longest span we can within the same CU.
+ if (Cur.CU != Prev.CU) {
ArangeSpan Span;
- Span.Start = Cur.Sym;
- Span.End = nullptr;
- if (Cur.CU)
- Spans[Cur.CU].push_back(Span);
- }
- } else {
- // Build spans between each label.
- const MCSymbol *StartSym = List[0].Sym;
- for (size_t n = 1, e = List.size(); n < e; n++) {
- const SymbolCU &Prev = List[n - 1];
- const SymbolCU &Cur = List[n];
-
- // Try and build the longest span we can within the same CU.
- if (Cur.CU != Prev.CU) {
- ArangeSpan Span;
- Span.Start = StartSym;
- Span.End = Cur.Sym;
- Spans[Prev.CU].push_back(Span);
- StartSym = Cur.Sym;
- }
+ Span.Start = StartSym;
+ Span.End = Cur.Sym;
+ Spans[Prev.CU].push_back(Span);
+ StartSym = Cur.Sym;
}
}
}
+ // Start the dwarf aranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfARangesSection());
+
unsigned PtrSize = Asm->getDataLayout().getPointerSize();
// Build a list of CUs used.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 48c2809..1c0e163 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,26 +14,25 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
-#include "DwarfFile.h"
#include "AsmPrinterHandler.h"
-#include "DIE.h"
#include "DbgValueHistoryCalculator.h"
#include "DebugLocEntry.h"
#include "DebugLocList.h"
#include "DwarfAccelTable.h"
+#include "DwarfFile.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Allocator.h"
-
#include <memory>
namespace llvm {
@@ -68,41 +67,67 @@ public:
//===----------------------------------------------------------------------===//
/// \brief This class is used to track local variable information.
+///
+/// - Variables whose location changes over time have a DotDebugLocOffset and
+/// the other fields are not used.
+///
+/// - Variables that are described by multiple MMI table entries have multiple
+/// expressions and frame indices.
class DbgVariable {
- DIVariable Var; // Variable Descriptor.
- DIExpression Expr; // Complex address location expression.
- DIE *TheDIE; // Variable DIE.
- unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
- const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
- int FrameIndex;
+ DIVariable Var; /// Variable Descriptor.
+ SmallVector<DIExpression, 1> Expr; /// Complex address location expression.
+ DIE *TheDIE; /// Variable DIE.
+ unsigned DotDebugLocOffset; /// Offset in DotDebugLocEntries.
+ const MachineInstr *MInsn; /// DBG_VALUE instruction of the variable.
+ SmallVector<int, 1> FrameIndex; /// Frame index of the variable.
DwarfDebug *DD;
public:
/// Construct a DbgVariable from a DIVariable.
- DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD)
- : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U),
- MInsn(nullptr), FrameIndex(~0), DD(DD) {
- assert(Var.Verify() && Expr.Verify());
+ DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD, int FI = ~0)
+ : Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U),
+ MInsn(nullptr), DD(DD) {
+ FrameIndex.push_back(FI);
+ assert(Var.Verify() && E.Verify());
}
/// Construct a DbgVariable from a DEBUG_VALUE.
/// AbstractVar may be NULL.
DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
- : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()),
- TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue),
- FrameIndex(~0), DD(DD) {}
+ : Var(DbgValue->getDebugVariable()),
+ Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr),
+ DotDebugLocOffset(~0U), MInsn(DbgValue), DD(DD) {
+ FrameIndex.push_back(~0);
+ }
// Accessors.
DIVariable getVariable() const { return Var; }
- DIExpression getExpression() const { return Expr; }
+ const ArrayRef<DIExpression> getExpression() const { return Expr; }
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
StringRef getName() const { return Var.getName(); }
const MachineInstr *getMInsn() const { return MInsn; }
- int getFrameIndex() const { return FrameIndex; }
- void setFrameIndex(int FI) { FrameIndex = FI; }
+ const ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+
+ void addMMIEntry(const DbgVariable &V) {
+ assert( DotDebugLocOffset == ~0U && !MInsn && "not an MMI entry");
+ assert(V.DotDebugLocOffset == ~0U && !V.MInsn && "not an MMI entry");
+ assert(V.Var == Var && "conflicting DIVariable");
+
+ if (V.getFrameIndex().back() != ~0) {
+ auto E = V.getExpression();
+ auto FI = V.getFrameIndex();
+ Expr.append(E.begin(), E.end());
+ FrameIndex.append(FI.begin(), FI.end());
+ }
+ assert(Expr.size() > 1
+ ? std::all_of(Expr.begin(), Expr.end(),
+ [](DIExpression &E) { return E.isBitPiece(); })
+ : (true && "conflicting locations for variable"));
+ }
+
// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
if (Var.getTag() == dwarf::DW_TAG_arg_variable)
@@ -129,14 +154,11 @@ public:
bool variableHasComplexAddress() const {
assert(Var.isVariable() && "Invalid complex DbgVariable!");
- return Expr.getNumElements() > 0;
+ assert(Expr.size() == 1 &&
+ "variableHasComplexAddress() invoked on multi-FI variable");
+ return Expr.back().getNumElements() > 0;
}
bool isBlockByrefVariable() const;
- unsigned getNumAddrElements() const {
- assert(Var.isVariable() && "Invalid complex DbgVariable!");
- return Expr.getNumElements();
- }
- uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); }
DIType getType() const;
private:
@@ -179,10 +201,6 @@ class DwarfDebug : public AsmPrinterHandler {
// Size of each symbol emitted (for those symbols that have a specific size).
DenseMap<const MCSymbol *, uint64_t> SymSize;
- // Provides a unique id per text section.
- typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType;
- SectionMapType SectionMap;
-
LexicalScopes LScopes;
// Collection of abstract variables.
@@ -259,7 +277,8 @@ class DwarfDebug : public AsmPrinterHandler {
// them.
DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
- SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1> TypeUnitsUnderConstruction;
+ SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1>
+ TypeUnitsUnderConstruction;
// Whether to emit the pubnames/pubtypes sections.
bool HasDwarfPubSections;
@@ -348,10 +367,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// processed.
void finalizeModuleInfo();
- /// \brief Emit labels to close any remaining sections that have been left
- /// open.
- void endSections();
-
/// \brief Emit the debug info section.
void emitDebugInfo();
@@ -565,7 +580,8 @@ public:
void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry);
/// \brief emit a single value for the debug loc section.
void emitDebugLocValue(ByteStreamer &Streamer,
- const DebugLocEntry::Value &Value);
+ const DebugLocEntry::Value &Value,
+ unsigned PieceOffsetInBits = 0);
/// Emits an optimal (=sorted) sequence of DW_OP_pieces.
void emitLocPieces(ByteStreamer &Streamer,
const DITypeIdentifierMap &Map,
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
new file mode 100644
index 0000000..fcab067
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -0,0 +1,269 @@
+//===-- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfExpression.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+const TargetRegisterInfo *DwarfExpression::getTRI() const {
+ return AP.TM.getSubtargetImpl()->getRegisterInfo();
+}
+
+unsigned DwarfExpression::getDwarfVersion() const {
+ return AP.getDwarfDebug()->getDwarfVersion();
+}
+
+void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ if (DwarfReg < 32) {
+ EmitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ } else {
+ EmitOp(dwarf::DW_OP_regx, Comment);
+ EmitUnsigned(DwarfReg);
+ }
+}
+
+void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) {
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ if (DwarfReg < 32) {
+ EmitOp(dwarf::DW_OP_breg0 + DwarfReg);
+ } else {
+ EmitOp(dwarf::DW_OP_bregx);
+ EmitUnsigned(DwarfReg);
+ }
+ EmitSigned(Offset);
+ if (Deref)
+ EmitOp(dwarf::DW_OP_deref);
+}
+
+void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+ assert(SizeInBits > 0 && "piece has size zero");
+ const unsigned SizeOfByte = 8;
+ if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
+ EmitOp(dwarf::DW_OP_bit_piece);
+ EmitUnsigned(SizeInBits);
+ EmitUnsigned(OffsetInBits);
+ } else {
+ EmitOp(dwarf::DW_OP_piece);
+ unsigned ByteSize = SizeInBits / SizeOfByte;
+ EmitUnsigned(ByteSize);
+ }
+}
+
+void DwarfExpression::AddShr(unsigned ShiftBy) {
+ EmitOp(dwarf::DW_OP_constu);
+ EmitUnsigned(ShiftBy);
+ EmitOp(dwarf::DW_OP_shr);
+}
+
+bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
+ int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false);
+ if (DwarfReg < 0)
+ return false;
+
+ if (isFrameRegister(MachineReg)) {
+ // If variable offset is based in frame register then use fbreg.
+ EmitOp(dwarf::DW_OP_fbreg);
+ EmitSigned(Offset);
+ } else {
+ AddRegIndirect(DwarfReg, Offset);
+ }
+ return true;
+}
+
+bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
+ unsigned PieceSizeInBits,
+ unsigned PieceOffsetInBits) {
+ const TargetRegisterInfo *TRI = getTRI();
+ if (!TRI->isPhysicalRegister(MachineReg))
+ return false;
+
+ int Reg = TRI->getDwarfRegNum(MachineReg, false);
+
+ // If this is a valid register number, emit it.
+ if (Reg >= 0) {
+ AddReg(Reg);
+ if (PieceSizeInBits)
+ AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
+ return true;
+ }
+
+ // Walk up the super-register chain until we find a valid number.
+ // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
+ for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
+ Reg = TRI->getDwarfRegNum(*SR, false);
+ if (Reg >= 0) {
+ unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg);
+ unsigned Size = TRI->getSubRegIdxSize(Idx);
+ unsigned RegOffset = TRI->getSubRegIdxOffset(Idx);
+ AddReg(Reg, "super-register");
+ if (PieceOffsetInBits == RegOffset) {
+ AddOpPiece(Size, RegOffset);
+ } else {
+ // If this is part of a variable in a sub-register at a
+ // non-zero offset, we need to manually shift the value into
+ // place, since the DW_OP_piece describes the part of the
+ // variable, not the position of the subregister.
+ if (RegOffset)
+ AddShr(RegOffset);
+ AddOpPiece(Size, PieceOffsetInBits);
+ }
+ return true;
+ }
+ }
+
+ // Otherwise, attempt to find a covering set of sub-register numbers.
+ // For example, Q0 on ARM is a composition of D0+D1.
+ //
+ // Keep track of the current position so we can emit the more
+ // efficient DW_OP_piece.
+ unsigned CurPos = PieceOffsetInBits;
+ // The size of the register in bits, assuming 8 bits per byte.
+ unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8;
+ // Keep track of the bits in the register we already emitted, so we
+ // can avoid emitting redundant aliasing subregs.
+ SmallBitVector Coverage(RegSize, false);
+ for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
+ unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR);
+ unsigned Size = TRI->getSubRegIdxSize(Idx);
+ unsigned Offset = TRI->getSubRegIdxOffset(Idx);
+ Reg = TRI->getDwarfRegNum(*SR, false);
+
+ // Intersection between the bits we already emitted and the bits
+ // covered by this subregister.
+ SmallBitVector Intersection(RegSize, false);
+ Intersection.set(Offset, Offset + Size);
+ Intersection ^= Coverage;
+
+ // If this sub-register has a DWARF number and we haven't covered
+ // its range, emit a DWARF piece for it.
+ if (Reg >= 0 && Intersection.any()) {
+ AddReg(Reg, "sub-register");
+ AddOpPiece(Size, Offset == CurPos ? 0 : Offset);
+ CurPos = Offset + Size;
+
+ // Mark it as emitted.
+ Coverage.set(Offset, Offset + Size);
+ }
+ }
+
+ return CurPos > PieceOffsetInBits;
+}
+
+void DwarfExpression::AddSignedConstant(int Value) {
+ EmitOp(dwarf::DW_OP_consts);
+ EmitSigned(Value);
+ // The proper way to describe a constant value is
+ // DW_OP_constu <const>, DW_OP_stack_value.
+ // Unfortunately, DW_OP_stack_value was not available until DWARF-4,
+ // so we will continue to generate DW_OP_constu <const> for DWARF-2
+ // and DWARF-3. Technically, this is incorrect since DW_OP_const <const>
+ // actually describes a value at a constant addess, not a constant value.
+ // However, in the past there was no better way to describe a constant
+ // value, so the producers and consumers started to rely on heuristics
+ // to disambiguate the value vs. location status of the expression.
+ // See PR21176 for more details.
+ if (getDwarfVersion() >= 4)
+ EmitOp(dwarf::DW_OP_stack_value);
+}
+
+void DwarfExpression::AddUnsignedConstant(unsigned Value) {
+ EmitOp(dwarf::DW_OP_constu);
+ EmitUnsigned(Value);
+ // cf. comment in DwarfExpression::AddSignedConstant().
+ if (getDwarfVersion() >= 4)
+ EmitOp(dwarf::DW_OP_stack_value);
+}
+
+static unsigned getOffsetOrZero(unsigned OffsetInBits,
+ unsigned PieceOffsetInBits) {
+ if (OffsetInBits == PieceOffsetInBits)
+ return 0;
+ assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces");
+ return OffsetInBits;
+}
+
+bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
+ unsigned MachineReg,
+ unsigned PieceOffsetInBits) {
+ auto I = Expr.begin();
+ // Pattern-match combinations for which more efficient representations exist
+ // first.
+ if (I == Expr.end())
+ return AddMachineRegPiece(MachineReg);
+
+ bool ValidReg = false;
+ switch (*I) {
+ case dwarf::DW_OP_bit_piece: {
+ unsigned OffsetInBits = I->getArg(1);
+ unsigned SizeInBits = I->getArg(2);
+ // Piece always comes at the end of the expression.
+ return AddMachineRegPiece(MachineReg, SizeInBits,
+ getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+ }
+ case dwarf::DW_OP_plus:
+ // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
+ if (I->getNext() == dwarf::DW_OP_deref) {
+ unsigned Offset = I->getArg(1);
+ ValidReg = AddMachineRegIndirect(MachineReg, Offset);
+ std::advance(I, 2);
+ break;
+ } else
+ ValidReg = AddMachineRegPiece(MachineReg);
+ case dwarf::DW_OP_deref:
+ // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+ ValidReg = AddMachineRegIndirect(MachineReg);
+ ++I;
+ break;
+ default:
+ llvm_unreachable("unsupported operand");
+ }
+
+ if (!ValidReg)
+ return false;
+
+ // Emit remaining elements of the expression.
+ AddExpression(I, Expr.end(), PieceOffsetInBits);
+ return true;
+}
+
+void DwarfExpression::AddExpression(DIExpression::iterator I,
+ DIExpression::iterator E,
+ unsigned PieceOffsetInBits) {
+ for (; I != E; ++I) {
+ switch (*I) {
+ case dwarf::DW_OP_bit_piece: {
+ unsigned OffsetInBits = I->getArg(1);
+ unsigned SizeInBits = I->getArg(2);
+ AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+ break;
+ }
+ case dwarf::DW_OP_plus:
+ EmitOp(dwarf::DW_OP_plus_uconst);
+ EmitUnsigned(I->getArg(1));
+ break;
+ case dwarf::DW_OP_deref:
+ EmitOp(dwarf::DW_OP_deref);
+ break;
+ default:
+ llvm_unreachable("unhandled opcode found in DIExpression");
+ }
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
new file mode 100644
index 0000000..b90b7b6
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -0,0 +1,133 @@
+//===-- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class ByteStreamer;
+class TargetRegisterInfo;
+class DwarfUnit;
+class DIELoc;
+
+/// Base class containing the logic for constructing DWARF expressions
+/// independently of whether they are emitted into a DIE or into a .debug_loc
+/// entry.
+class DwarfExpression {
+protected:
+ const AsmPrinter &AP;
+ // Various convenience accessors that extract things out of AsmPrinter.
+ const TargetRegisterInfo *getTRI() const;
+ unsigned getDwarfVersion() const;
+
+public:
+ DwarfExpression(const AsmPrinter &AP) : AP(AP) {}
+ virtual ~DwarfExpression() {}
+
+ /// Output a dwarf operand and an optional assembler comment.
+ virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
+ /// Emit a raw signed value.
+ virtual void EmitSigned(int Value) = 0;
+ /// Emit a raw unsigned value.
+ virtual void EmitUnsigned(unsigned Value) = 0;
+ /// Return whether the given machine register is the frame register in the
+ /// current function.
+ virtual bool isFrameRegister(unsigned MachineReg) = 0;
+
+ /// Emit a dwarf register operation.
+ void AddReg(int DwarfReg, const char *Comment = nullptr);
+ /// Emit an (double-)indirect dwarf register operation.
+ void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false);
+
+ /// Emit a dwarf register operation for describing
+ /// - a small value occupying only part of a register or
+ /// - a register representing only part of a value.
+ void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+ /// Emit a shift-right dwarf expression.
+ void AddShr(unsigned ShiftBy);
+
+ /// Emit an indirect dwarf register operation for the given machine register.
+ /// \return false if no DWARF register exists for MachineReg.
+ bool AddMachineRegIndirect(unsigned MachineReg, int Offset = 0);
+
+ /// \brief Emit a partial DWARF register operation.
+ /// \param MachineReg the register
+ /// \param PieceSizeInBits size and
+ /// \param PieceOffsetInBits offset of the piece in bits, if this is one
+ /// piece of an aggregate value.
+ ///
+ /// If size and offset is zero an operation for the entire
+ /// register is emitted: Some targets do not provide a DWARF
+ /// register number for every register. If this is the case, this
+ /// function will attempt to emit a DWARF register by emitting a
+ /// piece of a super-register or by piecing together multiple
+ /// subregisters that alias the register.
+ ///
+ /// \return false if no DWARF register exists for MachineReg.
+ bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0,
+ unsigned PieceOffsetInBits = 0);
+
+ /// Emit a signed constant.
+ void AddSignedConstant(int Value);
+ /// Emit an unsigned constant.
+ void AddUnsignedConstant(unsigned Value);
+
+ /// Emit an entire DIExpression on top of a machine register location.
+ /// \param PieceOffsetInBits If this is one piece out of a fragmented
+ /// location, this is the offset of the piece inside the entire variable.
+ /// \return false if no DWARF register exists for MachineReg.
+ bool AddMachineRegExpression(DIExpression Expr, unsigned MachineReg,
+ unsigned PieceOffsetInBits = 0);
+ /// Emit a the operations remaining the DIExpressionIterator I.
+ /// \param PieceOffsetInBits If this is one piece out of a fragmented
+ /// location, this is the offset of the piece inside the entire variable.
+ void AddExpression(DIExpression::iterator I, DIExpression::iterator E,
+ unsigned PieceOffsetInBits = 0);
+};
+
+/// DwarfExpression implementation for .debug_loc entries.
+class DebugLocDwarfExpression : public DwarfExpression {
+ ByteStreamer &BS;
+
+public:
+ DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS)
+ : DwarfExpression(AP), BS(BS) {}
+
+ void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void EmitSigned(int Value) override;
+ void EmitUnsigned(unsigned Value) override;
+ bool isFrameRegister(unsigned MachineReg) override;
+};
+
+/// DwarfExpression implementation for singular DW_AT_location.
+class DIEDwarfExpression : public DwarfExpression {
+ DwarfUnit &DU;
+ DIELoc &DIE;
+
+public:
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE)
+ : DwarfExpression(AP), DU(DU), DIE(DIE) {}
+
+ void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void EmitSigned(int Value) override;
+ void EmitUnsigned(unsigned Value) override;
+ bool isFrameRegister(unsigned MachineReg) override;
+};
+}
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 50180ea..3988f0d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -8,13 +8,12 @@
//===----------------------------------------------------------------------===//
#include "DwarfFile.h"
-
#include "DwarfDebug.h"
#include "DwarfUnit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
@@ -147,7 +146,7 @@ void DwarfFile::emitStrings(const MCSection *StrSection,
StrPool.emit(*Asm, StrSection, OffsetSection);
}
-void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
DIVariable DV = Var->getVariable();
// Variables with positive arg numbers are parameters.
@@ -169,18 +168,17 @@ void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
// A later indexed parameter has been found, insert immediately before it.
if (CurNum > ArgNum)
break;
- // FIXME: There are still some cases where two inlined functions are
- // conflated together (two calls to the same function at the same
- // location (eg: via a macro, or without column info, etc)) and then
- // their arguments are conflated as well.
- assert((LS->getParent() || CurNum != ArgNum) &&
- "Duplicate argument for top level (non-inlined) function");
+ if (CurNum == ArgNum) {
+ (*I)->addMMIEntry(*Var);
+ return false;
+ }
++I;
}
Vars.insert(I, Var);
- return;
+ return true;
}
Vars.push_back(Var);
+ return true;
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 9d64bfc..35bf33a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -10,17 +10,16 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
+#include "AddressPool.h"
+#include "DwarfStringPool.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Allocator.h"
-#include "AddressPool.h"
-#include "DwarfStringPool.h"
-
-#include <vector>
-#include <string>
#include <memory>
+#include <string>
+#include <vector>
namespace llvm {
class AsmPrinter;
@@ -96,7 +95,8 @@ public:
/// \brief Returns the string pool.
DwarfStringPool &getStringPool() { return StrPool; }
- void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+ /// \returns false if the variable was merged with a previous one.
+ bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() {
return ScopeVariables;
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index ab32c1b..63e3412 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -13,7 +13,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Support/Allocator.h"
-
#include <utility>
namespace llvm {
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 919d9d2..b0c7d48 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "DwarfUnit.h"
-
#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
+#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
@@ -43,6 +43,20 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
cl::desc("Generate DWARF4 type units."),
cl::init(false));
+void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
+ DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
+}
+void DIEDwarfExpression::EmitSigned(int Value) {
+ DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
+}
+void DIEDwarfExpression::EmitUnsigned(unsigned Value) {
+ DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+}
+bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) {
+ return MachineReg == getTRI()->getFrameRegister(*AP.MF);
+}
+
+
/// Unit - Unit constructor.
DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
@@ -116,6 +130,30 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
if (dwarf::DWARF_VERSION >= 4)
return 1;
break;
+
+ // The languages below have valid values only if the DWARF version >= 5.
+ case dwarf::DW_LANG_OpenCL:
+ case dwarf::DW_LANG_Go:
+ case dwarf::DW_LANG_Haskell:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_OCaml:
+ case dwarf::DW_LANG_Rust:
+ case dwarf::DW_LANG_C11:
+ case dwarf::DW_LANG_Swift:
+ case dwarf::DW_LANG_Dylan:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ if (dwarf::DWARF_VERSION >= 5)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Modula3:
+ case dwarf::DW_LANG_Julia:
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ if (dwarf::DWARF_VERSION >= 5)
+ return 1;
+ break;
}
return -1;
@@ -399,85 +437,18 @@ void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) {
}
/// addRegisterOp - Add register operand.
-// FIXME: Ideally, this would share the implementation with
-// AsmPrinter::EmitDwarfRegOpPiece.
-void DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
+bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
unsigned SizeInBits, unsigned OffsetInBits) {
- const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
- int DWReg = RI->getDwarfRegNum(Reg, false);
- bool isSubRegister = DWReg < 0;
-
- unsigned Idx = 0;
-
- // Go up the super-register chain until we hit a valid dwarf register number.
- for (MCSuperRegIterator SR(Reg, RI); SR.isValid() && DWReg < 0; ++SR) {
- DWReg = RI->getDwarfRegNum(*SR, false);
- if (DWReg >= 0)
- Idx = RI->getSubRegIndex(*SR, Reg);
- }
-
- if (DWReg < 0) {
- DEBUG(dbgs() << "Invalid Dwarf register number.\n");
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_nop);
- return;
- }
-
- // Emit register.
- if (DWReg < 32)
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
- else {
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
- addUInt(TheDie, dwarf::DW_FORM_udata, DWReg);
- }
-
- // Emit mask.
- bool isPiece = SizeInBits > 0;
- if (isSubRegister || isPiece) {
- const unsigned SizeOfByte = 8;
- unsigned RegSizeInBits = RI->getSubRegIdxSize(Idx);
- unsigned RegOffsetInBits = RI->getSubRegIdxOffset(Idx);
- unsigned PieceSizeInBits = std::max(SizeInBits, RegSizeInBits);
- unsigned PieceOffsetInBits = OffsetInBits ? OffsetInBits : RegOffsetInBits;
- assert(RegSizeInBits >= SizeInBits && "register smaller than value");
-
- if (RegOffsetInBits != PieceOffsetInBits) {
- // Manually shift the value into place, since the DW_OP_piece
- // describes the part of the variable, not the position of the
- // subregister.
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- addUInt(TheDie, dwarf::DW_FORM_data1, RegOffsetInBits);
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_shr);
- }
-
- if (PieceOffsetInBits > 0 || PieceSizeInBits % SizeOfByte) {
- assert(PieceSizeInBits > 0 && "piece has zero size");
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece);
- addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits);
- addUInt(TheDie, dwarf::DW_FORM_data1, PieceOffsetInBits);
- } else {
- assert(PieceSizeInBits > 0 && "piece has zero size");
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece);
- addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits/SizeOfByte);
- }
- }
+ DIEDwarfExpression Expr(*Asm, *this, TheDie);
+ Expr.AddMachineRegPiece(Reg, SizeInBits, OffsetInBits);
+ return true;
}
/// addRegisterOffset - Add register offset.
-void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
+bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
int64_t Offset) {
- const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
- unsigned DWReg = RI->getDwarfRegNum(Reg, false);
- const TargetRegisterInfo *TRI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
- if (Reg == TRI->getFrameRegister(*Asm->MF))
- // If variable offset is based in frame register then use fbreg.
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
- else if (DWReg < 32)
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
- else {
- addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
- addUInt(TheDie, dwarf::DW_FORM_udata, DWReg);
- }
- addSInt(TheDie, dwarf::DW_FORM_sdata, Offset);
+ DIEDwarfExpression Expr(*Asm, *this, TheDie);
+ return Expr.AddMachineRegIndirect(Reg, Offset);
}
/* Byref variables, in Blocks, are declared by the programmer as "SomeType
@@ -581,10 +552,14 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// variable's location.
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ bool validReg;
if (Location.isReg())
- addRegisterOpPiece(*Loc, Location.getReg());
+ validReg = addRegisterOpPiece(*Loc, Location.getReg());
else
- addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+ validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+
+ if (!validReg)
+ return;
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
@@ -622,13 +597,19 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
dwarf::Tag T = (dwarf::Tag)Ty.getTag();
// Encode pointer constants as unsigned bytes. This is used at least for
// null pointer constant emission.
+ // (Pieces of) aggregate types that get hacked apart by SROA may also be
+ // represented by a constant. Encode them as unsigned bytes.
// FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
// here, but accept them for now due to a bug in SROA producing bogus
// dbg.values.
- if (T == dwarf::DW_TAG_pointer_type ||
+ if (T == dwarf::DW_TAG_array_type ||
+ T == dwarf::DW_TAG_class_type ||
+ T == dwarf::DW_TAG_pointer_type ||
T == dwarf::DW_TAG_ptr_to_member_type ||
T == dwarf::DW_TAG_reference_type ||
- T == dwarf::DW_TAG_rvalue_reference_type)
+ T == dwarf::DW_TAG_rvalue_reference_type ||
+ T == dwarf::DW_TAG_structure_type ||
+ T == dwarf::DW_TAG_union_type)
return true;
assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
T == dwarf::DW_TAG_volatile_type ||
@@ -649,11 +630,15 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
Encoding == dwarf::DW_ATE_unsigned_char ||
Encoding == dwarf::DW_ATE_signed ||
Encoding == dwarf::DW_ATE_signed_char ||
- Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean) &&
+ Encoding == dwarf::DW_ATE_float ||
+ Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+ (Ty.getTag() == dwarf::DW_TAG_unspecified_type &&
+ Ty.getName() == "decltype(nullptr)")) &&
"Unsupported encoding");
return (Encoding == dwarf::DW_ATE_unsigned ||
Encoding == dwarf::DW_ATE_unsigned_char ||
- Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean);
+ Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+ Ty.getTag() == dwarf::DW_TAG_unspecified_type);
}
/// If this type is derived from a base type then return base type size.
@@ -667,10 +652,7 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {
DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());
- // If this type is not derived from any type or the type is a declaration then
- // take conservative approach.
- if (!BaseType.isValid() || BaseType.isForwardDecl())
- return Ty.getSizeInBits();
+ assert(BaseType.isValid() && "Unexpected invalid base type");
// If this is a derived type, go ahead and get the base type, unless it's a
// reference then it's just the size of the field. Pointer types have no need
@@ -977,7 +959,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
addString(Buffer, dwarf::DW_AT_name, Name);
// Add size if non-zero (derived types might be zero-sized.)
- if (Size && Tag != dwarf::DW_TAG_pointer_type)
+ if (Size && Tag != dwarf::DW_TAG_pointer_type
+ && Tag != dwarf::DW_TAG_ptr_to_member_type)
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
@@ -1110,6 +1093,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (CTy.isAppleBlockExtension())
addFlag(Buffer, dwarf::DW_AT_APPLE_block);
+ // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
+ // inside C++ composite types to point to the base class with the vtable.
DICompositeType ContainingType(resolve(CTy.getContainingType()));
if (ContainingType)
addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
@@ -1187,10 +1172,10 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
addType(ParamDIE, resolve(VP.getType()));
if (!VP.getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, VP.getName());
- if (Value *Val = VP.getValue()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Val))
+ if (Metadata *Val = VP.getValue()) {
+ if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
addConstantValue(ParamDIE, CI, resolve(VP.getType()));
- else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) {
+ else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
// For declaration non-type template parameters (such as global values and
// functions)
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
@@ -1359,7 +1344,7 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
if (SP.isOptimized())
addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
- if (unsigned isa = Asm->getISAEncoding()) {
+ if (unsigned isa = Asm->getISAEncoding(SP.getFunction())) {
addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
}
@@ -1511,7 +1496,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
uint64_t FieldSize = getBaseTypeSize(DD, DT);
uint64_t OffsetInBytes;
- if (Size != FieldSize) {
+ if (FieldSize && Size != FieldSize) {
// Handle bitfield, assume bytes are 8 bits.
addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index f40c937..7a5e47d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -14,17 +14,17 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
-#include "DIE.h"
#include "DwarfDebug.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCDwarf.h"
namespace llvm {
@@ -138,6 +138,7 @@ public:
}
// Accessors.
+ AsmPrinter* getAsmPrinter() const { return Asm; }
unsigned getUniqueID() const { return UniqueID; }
uint16_t getLanguage() const { return CUNode.getLanguage(); }
DICompileUnit getCUNode() const { return CUNode; }
@@ -253,12 +254,16 @@ public:
/// addTemplateParams - Add template parameters in buffer.
void addTemplateParams(DIE &Buffer, DIArray TParams);
- /// addRegisterOp - Add register operand.
- void addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
+ /// \brief Add register operand.
+ /// \returns false if the register does not exist, e.g., because it was never
+ /// materialized.
+ bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
- /// addRegisterOffset - Add register offset.
- void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
+ /// \brief Add register offset.
+ /// \returns false if the register does not exist, e.g., because it was never
+ /// materialized.
+ bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
// FIXME: Should be reformulated in terms of addComplexAddress.
/// addBlockByrefAddress - Start with the address based on the location
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 2bbffb3..4841814 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -121,7 +121,8 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
int TypeID = TypeIds[J];
assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
- int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ int ValueForTypeID =
+ isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
@@ -195,9 +196,22 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
/// table. Entries must be ordered by try-range address.
void EHStreamer::
computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const RangeMapType &PadMap,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions) {
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced so we can put them in the LSDA.
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
// The end label of the previous invoke or nounwind try-range.
MCSymbol *LastLabel = nullptr;
@@ -208,6 +222,8 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// Whether the last CallSite entry was for an invoke.
bool PreviousIsInvoke = false;
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+
// Visit all instructions in order of address.
for (const auto &MBB : *Asm->MF) {
for (const auto &MI : MBB) {
@@ -237,7 +253,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// instruction between the previous try-range and this one may throw,
// create a call-site entry with no landing pad for the region between the
// try-ranges.
- if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) {
+ if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) {
CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
CallSites.push_back(Site);
PreviousIsInvoke = false;
@@ -254,14 +270,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
CallSiteEntry Site = {
BeginLabel,
LastLabel,
- LandingPad->LandingPadLabel,
+ LandingPad,
FirstActions[P.PadIndex]
};
// Try to merge with the previous call-site. SJLJ doesn't do this
- if (PreviousIsInvoke && Asm->MAI->usesItaniumLSDAForExceptions()) {
+ if (PreviousIsInvoke && !IsSJLJ) {
CallSiteEntry &Prev = CallSites.back();
- if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) {
// Extend the range of the previous entry.
Prev.EndLabel = Site.EndLabel;
continue;
@@ -269,7 +285,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
}
// Otherwise, create a new call-site.
- if (Asm->MAI->usesItaniumLSDAForExceptions())
+ if (!IsSJLJ)
CallSites.push_back(Site);
else {
// SjLj EH must maintain the call sites in the order assigned
@@ -287,7 +303,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// If some instruction between the previous try-range and the end of the
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
- if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) {
+ if (SawPotentiallyThrowing && !IsSJLJ) {
CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
CallSites.push_back(Site);
}
@@ -338,23 +354,9 @@ void EHStreamer::emitExceptionTable() {
unsigned SizeActions =
computeActionsTable(LandingPads, Actions, FirstActions);
- // Invokes and nounwind calls have entries in PadMap (due to being bracketed
- // by try-range labels when lowered). Ordinary calls do not, so appropriate
- // try-ranges for them need be deduced when using DWARF exception handling.
- RangeMapType PadMap;
- for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
- const LandingPadInfo *LandingPad = LandingPads[i];
- for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
- MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
- assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
- PadRange P = { i, j };
- PadMap[BeginLabel] = P;
- }
- }
-
// Compute the call-site table.
SmallVector<CallSiteEntry, 64> CallSites;
- computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+ computeCallSiteTable(CallSites, LandingPads, FirstActions);
// Final tallies.
@@ -519,8 +521,7 @@ void EHStreamer::emitExceptionTable() {
Asm->EmitULEB128(S.Action);
}
} else {
- // DWARF Exception handling
- assert(Asm->MAI->usesItaniumLSDAForExceptions());
+ // Itanium LSDA exception handling
// The call-site table is a list of all call sites that may throw an
// exception (including C++ 'throw' statements) in the procedure
@@ -576,15 +577,15 @@ void EHStreamer::emitExceptionTable() {
// Offset of the landing pad, counted in 16-byte bundles relative to the
// @LPStart address.
- if (!S.PadLabel) {
+ if (!S.LPad) {
if (VerboseAsm)
Asm->OutStreamer.AddComment(" has no landing pad");
Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
} else {
if (VerboseAsm)
Asm->OutStreamer.AddComment(Twine(" jumps to ") +
- S.PadLabel->getName());
- Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+ S.LPad->LandingPadLabel->getName());
+ Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4);
}
// Offset of the first associated action record, relative to the start of
@@ -681,7 +682,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
unsigned TypeID = *I;
if (VerboseAsm) {
--Entry;
- if (TypeID != 0)
+ if (isFilterEHSelector(TypeID))
Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
}
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 7e9549d..9b316ff 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -23,6 +23,8 @@ class MachineModuleInfo;
class MachineInstr;
class MachineFunction;
class AsmPrinter;
+class MCSymbol;
+class MCSymbolRefExpr;
template <typename T>
class SmallVectorImpl;
@@ -60,11 +62,11 @@ protected:
/// Structure describing an entry in the call-site table.
struct CallSiteEntry {
// The 'try-range' is BeginLabel .. EndLabel.
- MCSymbol *BeginLabel; // zero indicates the start of the function.
- MCSymbol *EndLabel; // zero indicates the end of the function.
+ MCSymbol *BeginLabel; // Null indicates the start of the function.
+ MCSymbol *EndLabel; // Null indicates the end of the function.
- // The landing pad starts at PadLabel.
- MCSymbol *PadLabel; // zero indicates that there is no landing pad.
+ // LPad contains the landing pad start labels.
+ const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
unsigned Action;
};
@@ -86,7 +88,6 @@ protected:
/// form gaps in the table. Entries must be ordered by try-range address.
void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const RangeMapType &PadMap,
const SmallVectorImpl<const LandingPadInfo *> &LPs,
const SmallVectorImpl<unsigned> &FirstActions);
@@ -113,6 +114,13 @@ protected:
virtual void emitTypeInfos(unsigned TTypeEncoding);
+ // Helpers for for identifying what kind of clause an EH typeid or selector
+ // corresponds to. Negative selectors are for filter clauses, the zero
+ // selector is for cleanups, and positive selectors are for catch clauses.
+ static bool isFilterEHSelector(int Selector) { return Selector < 0; }
+ static bool isCleanupEHSelector(int Selector) { return Selector == 0; }
+ static bool isCatchEHSelector(int Selector) { return Selector > 0; }
+
public:
EHStreamer(AsmPrinter *A);
virtual ~EHStreamer();
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 5bda5a9..97a3234 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -34,35 +34,35 @@ using namespace llvm;
namespace {
- class ErlangGCPrinter : public GCMetadataPrinter {
- public:
- void beginAssembly(AsmPrinter &AP) override;
- void finishAssembly(AsmPrinter &AP) override;
- };
-
+class ErlangGCPrinter : public GCMetadataPrinter {
+public:
+ void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
}
static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
-X("erlang", "erlang-compatible garbage collector");
-
-void llvm::linkErlangGCPrinter() { }
+ X("erlang", "erlang-compatible garbage collector");
-void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
+void llvm::linkErlangGCPrinter() {}
-void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
+void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
MCStreamer &OS = AP.OutStreamer;
- unsigned IntPtrSize =
- AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
// Put this in a custom .note section.
- AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
- .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
- SectionKind::getDataRel()));
+ AP.OutStreamer.SwitchSection(
+ AP.getObjFileLowering().getContext().getELFSection(".note.gc",
+ ELF::SHT_PROGBITS, 0));
// For each function...
- for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+ for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(),
+ IE = Info.funcinfo_end();
+ FI != IE; ++FI) {
GCFunctionInfo &MD = **FI;
-
+ if (MD.getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
/** A compact GC layout. Emit this data structure:
*
* struct {
@@ -88,7 +88,7 @@ void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
// Emit the address of the safe point.
OS.AddComment("safe point address");
MCSymbol *Label = PI->Label;
- AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+ AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/);
}
// Stack information never change in safe points! Only print info from the
@@ -101,8 +101,9 @@ void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
// Emit stack arity, i.e. the number of stacked arguments.
unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
- unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
- MD.getFunction().arg_size() - RegisteredArgs : 0;
+ unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs
+ ? MD.getFunction().arg_size() - RegisteredArgs
+ : 0;
OS.AddComment("stack arity");
AP.EmitInt16(StackArity);
@@ -113,7 +114,7 @@ void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
// And for each live root...
for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
LE = MD.live_end(PI);
- LI != LE; ++LI) {
+ LI != LE; ++LI) {
// Emit live root's offset within the stack frame.
OS.AddComment("stack index (offset / wordsize)");
AP.EmitInt16(LI->StackOffset / IntPtrSize);
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 6480d048..76d6a06 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -32,18 +32,17 @@ using namespace llvm;
namespace {
- class OcamlGCMetadataPrinter : public GCMetadataPrinter {
- public:
- void beginAssembly(AsmPrinter &AP) override;
- void finishAssembly(AsmPrinter &AP) override;
- };
-
+class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+public:
+ void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+ void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
}
static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
-Y("ocaml", "ocaml 3.10-compatible collector");
+ Y("ocaml", "ocaml 3.10-compatible collector");
-void llvm::linkOcamlGCPrinter() { }
+void llvm::linkOcamlGCPrinter() {}
static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
const std::string &MId = M.getModuleIdentifier();
@@ -67,12 +66,13 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
AP.OutStreamer.EmitLabel(Sym);
}
-void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
- EmitCamlGlobal(getModule(), AP, "code_begin");
+ EmitCamlGlobal(M, AP, "code_begin");
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
- EmitCamlGlobal(getModule(), AP, "data_begin");
+ EmitCamlGlobal(M, AP, "data_begin");
}
/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
@@ -91,47 +91,59 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
/// either condition is detected in a function which uses the GC.
///
-void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
- unsigned IntPtrSize =
- AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
- EmitCamlGlobal(getModule(), AP, "code_end");
+ EmitCamlGlobal(M, AP, "code_end");
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
- EmitCamlGlobal(getModule(), AP, "data_end");
+ EmitCamlGlobal(M, AP, "data_end");
// FIXME: Why does ocaml emit this??
AP.OutStreamer.EmitIntValue(0, IntPtrSize);
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
- EmitCamlGlobal(getModule(), AP, "frametable");
+ EmitCamlGlobal(M, AP, "frametable");
int NumDescriptors = 0;
- for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
+ IE = Info.funcinfo_end();
+ I != IE; ++I) {
GCFunctionInfo &FI = **I;
+ if (FI.getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
NumDescriptors++;
}
}
- if (NumDescriptors >= 1<<16) {
+ if (NumDescriptors >= 1 << 16) {
// Very rude!
report_fatal_error(" Too much descriptor for ocaml GC");
}
AP.EmitInt16(NumDescriptors);
AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
- for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
+ IE = Info.funcinfo_end();
+ I != IE; ++I) {
GCFunctionInfo &FI = **I;
+ if (FI.getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
uint64_t FrameSize = FI.getFrameSize();
- if (FrameSize >= 1<<16) {
+ if (FrameSize >= 1 << 16) {
// Very rude!
report_fatal_error("Function '" + FI.getFunction().getName() +
"' is too large for the ocaml GC! "
- "Frame size " + Twine(FrameSize) + ">= 65536.\n"
- "(" + Twine(uintptr_t(&FI)) + ")");
+ "Frame size " +
+ Twine(FrameSize) + ">= 65536.\n"
+ "(" +
+ Twine(uintptr_t(&FI)) + ")");
}
AP.OutStreamer.AddComment("live roots for " +
@@ -140,11 +152,12 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
size_t LiveCount = FI.live_size(J);
- if (LiveCount >= 1<<16) {
+ if (LiveCount >= 1 << 16) {
// Very rude!
report_fatal_error("Function '" + FI.getFunction().getName() +
"' is too large for the ocaml GC! "
- "Live root count "+Twine(LiveCount)+" >= 65536.");
+ "Live root count " +
+ Twine(LiveCount) + " >= 65536.");
}
AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize);
@@ -152,12 +165,13 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
AP.EmitInt16(LiveCount);
for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
- KE = FI.live_end(J); K != KE; ++K) {
- if (K->StackOffset >= 1<<16) {
+ KE = FI.live_end(J);
+ K != KE; ++K) {
+ if (K->StackOffset >= 1 << 16) {
// Very rude!
report_fatal_error(
- "GC root stack offset is outside of fixed stack frame and out "
- "of range for ocaml GC!");
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
}
AP.EmitInt16(K->StackOffset);
}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 0f0ad75..2b03877 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -60,7 +60,7 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+ const Function *Per = MF->getMMI().getPersonality();
shouldEmitPersonality = hasLandingPads &&
PerEncoding != dwarf::DW_EH_PE_omit && Per;
@@ -99,9 +99,151 @@ void Win64Exception::endFunction(const MachineFunction *) {
if (shouldEmitPersonality) {
Asm->OutStreamer.PushSection();
+
+ // Emit an UNWIND_INFO struct describing the prologue.
Asm->OutStreamer.EmitWinEHHandlerData();
- emitExceptionTable();
+
+ // Emit the tables appropriate to the personality function in use. If we
+ // don't recognize the personality, assume it uses an Itanium-style LSDA.
+ EHPersonality Per = MMI->getPersonalityType();
+ if (Per == EHPersonality::MSVC_Win64SEH)
+ emitCSpecificHandlerTable();
+ else
+ emitExceptionTable();
+
Asm->OutStreamer.PopSection();
}
Asm->OutStreamer.EmitWinCFIEndProc();
}
+
+const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
+ return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ Asm->OutContext);
+}
+
+/// Emit the language-specific data that __C_specific_handler expects. This
+/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
+/// up after faults with __try, __except, and __finally. The typeinfo values
+/// are not really RTTI data, but pointers to filter functions that return an
+/// integer (1, 0, or -1) indicating how to handle the exception. For __finally
+/// blocks and other cleanups, the landing pad label is zero, and the filter
+/// function is actually a cleanup handler with the same prototype. A catch-all
+/// entry is modeled with a null filter function field and a non-zero landing
+/// pad label.
+///
+/// Possible filter function return values:
+/// EXCEPTION_EXECUTE_HANDLER (1):
+/// Jump to the landing pad label after cleanups.
+/// EXCEPTION_CONTINUE_SEARCH (0):
+/// Continue searching this table or continue unwinding.
+/// EXCEPTION_CONTINUE_EXECUTION (-1):
+/// Resume execution at the trapping PC.
+///
+/// Inferred table structure:
+/// struct Table {
+/// int NumEntries;
+/// struct Entry {
+/// imagerel32 LabelStart;
+/// imagerel32 LabelEnd;
+/// imagerel32 FilterOrFinally; // One means catch-all.
+/// imagerel32 LabelLPad; // Zero means __finally.
+/// } Entries[NumEntries];
+/// };
+void Win64Exception::emitCSpecificHandlerTable() {
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+ // Simplifying assumptions for first implementation:
+ // - Cleanups are not implemented.
+ // - Filters are not implemented.
+
+ // The Itanium LSDA table sorts similar landing pads together to simplify the
+ // actions table, but we don't need that.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+ for (const auto &LP : PadInfos)
+ LandingPads.push_back(&LP);
+
+ // Compute label ranges for call sites as we would for the Itanium LSDA, but
+ // use an all zero action table because we aren't using these actions.
+ SmallVector<unsigned, 64> FirstActions;
+ FirstActions.resize(LandingPads.size());
+ SmallVector<CallSiteEntry, 64> CallSites;
+ computeCallSiteTable(CallSites, LandingPads, FirstActions);
+
+ MCSymbol *EHFuncBeginSym =
+ Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+ MCSymbol *EHFuncEndSym =
+ Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+
+ // Emit the number of table entries.
+ unsigned NumEntries = 0;
+ for (const CallSiteEntry &CSE : CallSites) {
+ if (!CSE.LPad)
+ continue; // Ignore gaps.
+ for (int Selector : CSE.LPad->TypeIds) {
+ // Ignore C++ filter clauses in SEH.
+ // FIXME: Implement cleanup clauses.
+ if (isCatchEHSelector(Selector))
+ ++NumEntries;
+ }
+ }
+ Asm->OutStreamer.EmitIntValue(NumEntries, 4);
+
+ // Emit the four-label records for each call site entry. The table has to be
+ // sorted in layout order, and the call sites should already be sorted.
+ for (const CallSiteEntry &CSE : CallSites) {
+ // Ignore gaps. Unlike the Itanium model, unwinding through a frame without
+ // an EH table entry will propagate the exception rather than terminating
+ // the program.
+ if (!CSE.LPad)
+ continue;
+ const LandingPadInfo *LPad = CSE.LPad;
+
+ // Compute the label range. We may reuse the function begin and end labels
+ // rather than forming new ones.
+ const MCExpr *Begin =
+ createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
+ const MCExpr *End;
+ if (CSE.EndLabel) {
+ // The interval is half-open, so we have to add one to include the return
+ // address of the last invoke in the range.
+ End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel),
+ MCConstantExpr::Create(1, Asm->OutContext),
+ Asm->OutContext);
+ } else {
+ End = createImageRel32(EHFuncEndSym);
+ }
+
+ // These aren't really type info globals, they are actually pointers to
+ // filter functions ordered by selector. The zero selector is used for
+ // cleanups, so slot zero corresponds to selector 1.
+ const std::vector<const GlobalValue *> &SelectorToFilter = MMI->getTypeInfos();
+
+ // Do a parallel iteration across typeids and clause labels, skipping filter
+ // clauses.
+ size_t NextClauseLabel = 0;
+ for (size_t I = 0, E = LPad->TypeIds.size(); I < E; ++I) {
+ // AddLandingPadInfo stores the clauses in reverse, but there is a FIXME
+ // to change that.
+ int Selector = LPad->TypeIds[E - I - 1];
+
+ // Ignore C++ filter clauses in SEH.
+ // FIXME: Implement cleanup clauses.
+ if (!isCatchEHSelector(Selector))
+ continue;
+
+ Asm->OutStreamer.EmitValue(Begin, 4);
+ Asm->OutStreamer.EmitValue(End, 4);
+ if (isCatchEHSelector(Selector)) {
+ assert(unsigned(Selector - 1) < SelectorToFilter.size());
+ const GlobalValue *TI = SelectorToFilter[Selector - 1];
+ if (TI) // Emit the filter function pointer.
+ Asm->OutStreamer.EmitValue(createImageRel32(Asm->getSymbol(TI)), 4);
+ else // Otherwise, this is a "catch i8* null", or catch all.
+ Asm->OutStreamer.EmitIntValue(1, 4);
+ }
+ MCSymbol *ClauseLabel = LPad->ClauseLabels[NextClauseLabel++];
+ Asm->OutStreamer.EmitValue(createImageRel32(ClauseLabel), 4);
+ }
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h
index 538e132..b2d5d1b 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.h
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.h
@@ -29,6 +29,10 @@ class Win64Exception : public EHStreamer {
/// Per-function flag to indicate if frame moves info should be emitted.
bool shouldEmitMoves;
+ void emitCSpecificHandlerTable();
+
+ const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value);
+
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 12f6bd7..4b64be0 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -31,10 +31,11 @@ using namespace llvm;
namespace {
class AtomicExpand: public FunctionPass {
const TargetMachine *TM;
+ const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
explicit AtomicExpand(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {
+ : FunctionPass(ID), TM(TM), TLI(nullptr) {
initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
}
@@ -67,9 +68,9 @@ FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
}
bool AtomicExpand::runOnFunction(Function &F) {
- if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
+ if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
return false;
- auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
SmallVector<Instruction *, 1> AtomicInsts;
@@ -91,7 +92,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
auto FenceOrdering = Monotonic;
bool IsStore, IsLoad;
- if (TargetLowering->getInsertFencesForAtomic()) {
+ if (TLI->getInsertFencesForAtomic()) {
if (LI && isAtLeastAcquire(LI->getOrdering())) {
FenceOrdering = LI->getOrdering();
LI->setOrdering(Monotonic);
@@ -107,9 +108,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(Monotonic);
IsStore = IsLoad = true;
- } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
- (isAtLeastRelease(CASI->getSuccessOrdering()) ||
- isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+ } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+ (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+ isAtLeastAcquire(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
// insertion, with a stronger one on the success path than on the
// failure path. As a result, fence insertion is directly done by
@@ -125,20 +126,19 @@ bool AtomicExpand::runOnFunction(Function &F) {
}
}
- if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
+ if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
MadeChange |= expandAtomicLoad(LI);
- } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
+ } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
MadeChange |= expandAtomicStore(SI);
} else if (RMWI) {
// There are two different ways of expanding RMW instructions:
// - into a load if it is idempotent
// - into a Cmpxchg/LL-SC loop otherwise
// we try them in that order.
- MadeChange |= (isIdempotentRMW(RMWI) &&
- simplifyIdempotentRMW(RMWI)) ||
- (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) &&
- expandAtomicRMW(RMWI));
- } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
+ MadeChange |=
+ (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
+ (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI));
+ } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
}
@@ -149,13 +149,9 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
bool IsStore, bool IsLoad) {
IRBuilder<> Builder(I);
- auto LeadingFence =
- TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
- Builder, Order, IsStore, IsLoad);
+ auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
- auto TrailingFence =
- TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
- Builder, Order, IsStore, IsLoad);
+ auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
// The trailing fence is emitted before the instruction instead of after
// because there is no easy way of setting Builder insertion point after
// an instruction. So we must erase it from the BB, and insert it back
@@ -171,16 +167,13 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
}
bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
- if (TM->getSubtargetImpl()
- ->getTargetLowering()
- ->hasLoadLinkedStoreConditional())
+ if (TLI->hasLoadLinkedStoreConditional())
return expandAtomicLoadToLL(LI);
else
return expandAtomicLoadToCmpXchg(LI);
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
IRBuilder<> Builder(LI);
// On some architectures, load-linked instructions are atomic for larger
@@ -231,9 +224,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
}
bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
- if (TM->getSubtargetImpl()
- ->getTargetLowering()
- ->hasLoadLinkedStoreConditional())
+ if (TLI->hasLoadLinkedStoreConditional())
return expandAtomicRMWToLLSC(AI);
else
return expandAtomicRMWToCmpXchg(AI);
@@ -277,7 +268,6 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
AtomicOrdering MemOpOrder = AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
@@ -397,7 +387,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
}
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
Value *Addr = CI->getPointerOperand();
@@ -551,13 +540,10 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
}
bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
-
if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
expandAtomicLoad(ResultingLoad);
return true;
}
-
return false;
}
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index b9b1fd8..82f5c48 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -15,633 +15,23 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <utility>
using namespace llvm;
-static cl::opt<unsigned>
-PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
- cl::desc("Threshold for partial unrolling"), cl::Hidden);
-
#define DEBUG_TYPE "basictti"
-namespace {
-
-class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
- const TargetMachine *TM;
-
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
- /// Estimate the cost overhead of SK_Alternate shuffle.
- unsigned getAltShuffleOverhead(Type *Ty) const;
-
- const TargetLoweringBase *getTLI() const {
- return TM->getSubtargetImpl()->getTargetLowering();
- }
-
-public:
- BasicTTI() : ImmutablePass(ID), TM(nullptr) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) {
- initializeBasicTTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override {
- pushTTIStack(this);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo*)this;
- return this;
- }
-
- bool hasBranchDivergence() const override;
-
- /// \name Scalar TTI Implementations
- /// @{
-
- bool isLegalAddImmediate(int64_t imm) const override;
- bool isLegalICmpImmediate(int64_t imm) const override;
- bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
- int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) const override;
- int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
- int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) const override;
- bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
- bool isTypeLegal(Type *Ty) const override;
- unsigned getJumpBufAlignment() const override;
- unsigned getJumpBufSize() const override;
- bool shouldBuildLookupTables() const override;
- bool haveFastSqrt(Type *Ty) const override;
- void getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const override;
-
- /// @}
-
- /// \name Vector TTI Implementations
- /// @{
-
- unsigned getNumberOfRegisters(bool Vector) const override;
- unsigned getMaxInterleaveFactor() const override;
- unsigned getRegisterBitWidth(bool Vector) const override;
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind, OperandValueProperties,
- OperandValueProperties) const override;
- unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
- int Index, Type *SubTp) const override;
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const override;
- unsigned getCFInstrCost(unsigned Opcode) const override;
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const override;
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const override;
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const override;
- unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
- ArrayRef<Type*> Tys) const override;
- unsigned getNumberOfParts(Type *Tp) const override;
- unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override;
- unsigned getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwise) const override;
-
- /// @}
-};
-
-}
-
-INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
- "Target independent code generator's TTI", true, true, false)
-char BasicTTI::ID = 0;
-
-ImmutablePass *
-llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) {
- return new BasicTTI(TM);
-}
-
-bool BasicTTI::hasBranchDivergence() const { return false; }
-
-bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
- return getTLI()->isLegalAddImmediate(imm);
-}
-
-bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
- return getTLI()->isLegalICmpImmediate(imm);
-}
-
-bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
- int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) const {
- TargetLoweringBase::AddrMode AM;
- AM.BaseGV = BaseGV;
- AM.BaseOffs = BaseOffset;
- AM.HasBaseReg = HasBaseReg;
- AM.Scale = Scale;
- return getTLI()->isLegalAddressingMode(AM, Ty);
-}
-
-int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
- int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) const {
- TargetLoweringBase::AddrMode AM;
- AM.BaseGV = BaseGV;
- AM.BaseOffs = BaseOffset;
- AM.HasBaseReg = HasBaseReg;
- AM.Scale = Scale;
- return getTLI()->getScalingFactorCost(AM, Ty);
-}
-
-bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
- return getTLI()->isTruncateFree(Ty1, Ty2);
-}
-
-bool BasicTTI::isTypeLegal(Type *Ty) const {
- EVT T = getTLI()->getValueType(Ty);
- return getTLI()->isTypeLegal(T);
-}
-
-unsigned BasicTTI::getJumpBufAlignment() const {
- return getTLI()->getJumpBufAlignment();
-}
-
-unsigned BasicTTI::getJumpBufSize() const {
- return getTLI()->getJumpBufSize();
-}
-
-bool BasicTTI::shouldBuildLookupTables() const {
- const TargetLoweringBase *TLI = getTLI();
- return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
-}
-
-bool BasicTTI::haveFastSqrt(Type *Ty) const {
- const TargetLoweringBase *TLI = getTLI();
- EVT VT = TLI->getValueType(Ty);
- return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
-}
-
-void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const {
- // This unrolling functionality is target independent, but to provide some
- // motivation for its intended use, for x86:
-
- // According to the Intel 64 and IA-32 Architectures Optimization Reference
- // Manual, Intel Core models and later have a loop stream detector
- // (and associated uop queue) that can benefit from partial unrolling.
- // The relevant requirements are:
- // - The loop must have no more than 4 (8 for Nehalem and later) branches
- // taken, and none of them may be calls.
- // - The loop can have no more than 18 (28 for Nehalem and later) uops.
-
- // According to the Software Optimization Guide for AMD Family 15h Processors,
- // models 30h-4fh (Steamroller and later) have a loop predictor and loop
- // buffer which can benefit from partial unrolling.
- // The relevant requirements are:
- // - The loop must have fewer than 16 branches
- // - The loop must have less than 40 uops in all executed loop branches
-
- // The number of taken branches in a loop is hard to estimate here, and
- // benchmarking has revealed that it is better not to be conservative when
- // estimating the branch count. As a result, we'll ignore the branch limits
- // until someone finds a case where it matters in practice.
-
- unsigned MaxOps;
- const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F);
- if (PartialUnrollingThreshold.getNumOccurrences() > 0)
- MaxOps = PartialUnrollingThreshold;
- else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
- MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
- else
- return;
-
- // Scan the loop: don't unroll loops with calls.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
-
- for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
- if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
- ImmutableCallSite CS(J);
- if (const Function *F = CS.getCalledFunction()) {
- if (!TopTTI->isLoweredToCall(F))
- continue;
- }
-
- return;
- }
- }
-
- // Enable runtime and partial unrolling up to the specified size.
- UP.Partial = UP.Runtime = true;
- UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by the vectorizers.
-//
-//===----------------------------------------------------------------------===//
-
-unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert,
- bool Extract) const {
- assert (Ty->isVectorTy() && "Can only scalarize vectors");
- unsigned Cost = 0;
-
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- if (Insert)
- Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
- if (Extract)
- Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
- }
-
- return Cost;
-}
-
-unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
- return 1;
-}
-
-unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
- return 32;
-}
-
-unsigned BasicTTI::getMaxInterleaveFactor() const {
- return 1;
-}
-
-unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind, OperandValueKind,
- OperandValueProperties,
- OperandValueProperties) const {
- // Check if any of the operands are vector operands.
- const TargetLoweringBase *TLI = getTLI();
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
-
- bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
- // Assume that floating point arithmetic operations cost twice as much as
- // integer operations.
- unsigned OpCost = (IsFloat ? 2 : 1);
-
- if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1.
- // If the type is split to multiple registers, assume that there is some
- // overhead to this.
- // TODO: Once we have extract/insert subvector cost we need to use them.
- if (LT.first > 1)
- return LT.first * 2 * OpCost;
- return LT.first * 1 * OpCost;
- }
-
- if (!TLI->isOperationExpand(ISD, LT.second)) {
- // If the operation is custom lowered then assume
- // thare the code is twice as expensive.
- return LT.first * 2 * OpCost;
- }
-
- // Else, assume that we need to scalarize this op.
- if (Ty->isVectorTy()) {
- unsigned Num = Ty->getVectorNumElements();
- unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType());
- // return the cost of multiple scalar invocation plus the cost of inserting
- // and extracting the values.
- return getScalarizationOverhead(Ty, true, true) + Num * Cost;
- }
-
- // We don't know anything about this scalar instruction.
- return OpCost;
-}
-
-unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
- assert(Ty->isVectorTy() && "Can only shuffle vectors");
- unsigned Cost = 0;
- // Shuffle cost is equal to the cost of extracting element from its argument
- // plus the cost of inserting them onto the result vector.
-
- // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
- // 0 of first vector, index 1 of second vector,index 2 of first vector and
- // finally index 3 of second vector and insert them at index <0,1,2,3> of
- // result vector.
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
- Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
- }
- return Cost;
-}
-
-unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) const {
- if (Kind == SK_Alternate) {
- return getAltShuffleOverhead(Tp);
- }
- return 1;
-}
-
-unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
- const TargetLoweringBase *TLI = getTLI();
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
- std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
-
- // Check for NOOP conversions.
- if (SrcLT.first == DstLT.first &&
- SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
- // Bitcast between types that are legalized to the same type are free.
- if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
- return 0;
- }
-
- if (Opcode == Instruction::Trunc &&
- TLI->isTruncateFree(SrcLT.second, DstLT.second))
- return 0;
-
- if (Opcode == Instruction::ZExt &&
- TLI->isZExtFree(SrcLT.second, DstLT.second))
- return 0;
-
- // If the cast is marked as legal (or promote) then assume low cost.
- if (SrcLT.first == DstLT.first &&
- TLI->isOperationLegalOrPromote(ISD, DstLT.second))
- return 1;
-
- // Handle scalar conversions.
- if (!Src->isVectorTy() && !Dst->isVectorTy()) {
-
- // Scalar bitcasts are usually free.
- if (Opcode == Instruction::BitCast)
- return 0;
-
- // Just check the op cost. If the operation is legal then assume it costs 1.
- if (!TLI->isOperationExpand(ISD, DstLT.second))
- return 1;
-
- // Assume that illegal scalar instruction are expensive.
- return 4;
- }
-
- // Check vector-to-vector casts.
- if (Dst->isVectorTy() && Src->isVectorTy()) {
-
- // If the cast is between same-sized registers, then the check is simple.
- if (SrcLT.first == DstLT.first &&
- SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
- // Assume that Zext is done using AND.
- if (Opcode == Instruction::ZExt)
- return 1;
-
- // Assume that sext is done using SHL and SRA.
- if (Opcode == Instruction::SExt)
- return 2;
-
- // Just check the op cost. If the operation is legal then assume it costs
- // 1 and multiply by the type-legalization overhead.
- if (!TLI->isOperationExpand(ISD, DstLT.second))
- return SrcLT.first * 1;
- }
-
- // If we are converting vectors and the operation is illegal, or
- // if the vectors are legalized to different types, estimate the
- // scalarization costs.
- unsigned Num = Dst->getVectorNumElements();
- unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(),
- Src->getScalarType());
-
- // Return the cost of multiple scalar invocation plus the cost of
- // inserting and extracting the values.
- return getScalarizationOverhead(Dst, true, true) + Num * Cost;
- }
-
- // We already handled vector-to-vector and scalar-to-scalar conversions. This
- // is where we handle bitcast between vectors and scalars. We need to assume
- // that the conversion is scalarized in one way or another.
- if (Opcode == Instruction::BitCast)
- // Illegal bitcasts are done by storing and loading from a stack slot.
- return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
- (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
-
- llvm_unreachable("Unhandled cast");
- }
-
-unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
- // Branches are assumed to be predicted.
- return 0;
-}
-
-unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
- const TargetLoweringBase *TLI = getTLI();
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- // Selects on vectors are actually vector selects.
- if (ISD == ISD::SELECT) {
- assert(CondTy && "CondTy must exist");
- if (CondTy->isVectorTy())
- ISD = ISD::VSELECT;
- }
-
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
-
- if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
- !TLI->isOperationExpand(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1. Multiply
- // by the type-legalization overhead.
- return LT.first * 1;
- }
-
- // Otherwise, assume that the cast is scalarized.
- if (ValTy->isVectorTy()) {
- unsigned Num = ValTy->getVectorNumElements();
- if (CondTy)
- CondTy = CondTy->getScalarType();
- unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
- CondTy);
-
- // Return the cost of multiple scalar invocation plus the cost of inserting
- // and extracting the values.
- return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
- }
-
- // Unknown scalar opcode.
- return 1;
-}
-
-unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
- std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType());
-
- return LT.first;
-}
-
-unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
- assert(!Src->isVoidTy() && "Invalid type");
- std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src);
-
- // Assuming that all loads of legal types cost 1.
- unsigned Cost = LT.first;
-
- if (Src->isVectorTy() &&
- Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
- // This is a vector load that legalizes to a larger type than the vector
- // itself. Unless the corresponding extending load or truncating store is
- // legal, then this will scalarize.
- TargetLowering::LegalizeAction LA = TargetLowering::Expand;
- EVT MemVT = getTLI()->getValueType(Src, true);
- if (MemVT.isSimple() && MemVT != MVT::Other) {
- if (Opcode == Instruction::Store)
- LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT());
- else
- LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT());
- }
-
- if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
- // This is a vector load/store for some illegal type that is scalarized.
- // We must account for the cost of building or decomposing the vector.
- Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
- Opcode == Instruction::Store);
- }
- }
-
- return Cost;
-}
-
-unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys) const {
- unsigned ISD = 0;
- switch (IID) {
- default: {
- // Assume that we need to scalarize this intrinsic.
- unsigned ScalarizationCost = 0;
- unsigned ScalarCalls = 1;
- if (RetTy->isVectorTy()) {
- ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
- ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
- }
- for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
- if (Tys[i]->isVectorTy()) {
- ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
- ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
- }
- }
-
- return ScalarCalls + ScalarizationCost;
- }
- // Look for intrinsics that can be lowered directly or turned into a scalar
- // intrinsic call.
- case Intrinsic::sqrt: ISD = ISD::FSQRT; break;
- case Intrinsic::sin: ISD = ISD::FSIN; break;
- case Intrinsic::cos: ISD = ISD::FCOS; break;
- case Intrinsic::exp: ISD = ISD::FEXP; break;
- case Intrinsic::exp2: ISD = ISD::FEXP2; break;
- case Intrinsic::log: ISD = ISD::FLOG; break;
- case Intrinsic::log10: ISD = ISD::FLOG10; break;
- case Intrinsic::log2: ISD = ISD::FLOG2; break;
- case Intrinsic::fabs: ISD = ISD::FABS; break;
- case Intrinsic::minnum: ISD = ISD::FMINNUM; break;
- case Intrinsic::maxnum: ISD = ISD::FMAXNUM; break;
- case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
- case Intrinsic::floor: ISD = ISD::FFLOOR; break;
- case Intrinsic::ceil: ISD = ISD::FCEIL; break;
- case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
- case Intrinsic::nearbyint:
- ISD = ISD::FNEARBYINT; break;
- case Intrinsic::rint: ISD = ISD::FRINT; break;
- case Intrinsic::round: ISD = ISD::FROUND; break;
- case Intrinsic::pow: ISD = ISD::FPOW; break;
- case Intrinsic::fma: ISD = ISD::FMA; break;
- case Intrinsic::fmuladd: ISD = ISD::FMA; break;
- // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- return 0;
- }
-
- const TargetLoweringBase *TLI = getTLI();
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
-
- if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1.
- // If the type is split to multiple registers, assume that there is some
- // overhead to this.
- // TODO: Once we have extract/insert subvector cost we need to use them.
- if (LT.first > 1)
- return LT.first * 2;
- return LT.first * 1;
- }
-
- if (!TLI->isOperationExpand(ISD, LT.second)) {
- // If the operation is custom lowered then assume
- // thare the code is twice as expensive.
- return LT.first * 2;
- }
-
- // If we can't lower fmuladd into an FMA estimate the cost as a floating
- // point mul followed by an add.
- if (IID == Intrinsic::fmuladd)
- return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
- TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
-
- // Else, assume that we need to scalarize this intrinsic. For math builtins
- // this will emit a costly libcall, adding call overhead and spills. Make it
- // very expensive.
- if (RetTy->isVectorTy()) {
- unsigned Num = RetTy->getVectorNumElements();
- unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
- Tys);
- return 10 * Cost * Num;
- }
-
- // This is going to be turned into a library call, make it expensive.
- return 10;
-}
-
-unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
- std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp);
- return LT.first;
-}
-
-unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
- return 0;
-}
+// This flag is used by the template base class for BasicTTIImpl, and here to
+// provide a definition.
+cl::opt<unsigned>
+ llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
+ cl::desc("Threshold for partial unrolling"),
+ cl::Hidden);
-unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwise) const {
- assert(Ty->isVectorTy() && "Expect a vector type");
- unsigned NumVecElts = Ty->getVectorNumElements();
- unsigned NumReduxLevels = Log2_32(NumVecElts);
- unsigned ArithCost = NumReduxLevels *
- TopTTI->getArithmeticInstrCost(Opcode, Ty);
- // Assume the pairwise shuffles add a cost.
- unsigned ShuffleCost =
- NumReduxLevels * (IsPairwise + 1) *
- TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty);
- return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
-}
+BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F)
+ : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 2128da1..b8f05cd 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -601,8 +601,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
if (EffectiveTailLen >= 2 &&
- MF->getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 092346b..f21d4d2 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen
ForwardControlFlowIntegrity.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
+ GCRootLowering.cpp
GCStrategy.cpp
GlobalMerge.cpp
IfConversion.cpp
@@ -95,6 +96,7 @@ add_llvm_library(LLVMCodeGen
ScheduleDAGPrinter.cpp
ScoreboardHazardRecognizer.cpp
ShadowStackGC.cpp
+ ShadowStackGCLowering.cpp
SjLjEHPrepare.cpp
SlotIndexes.cpp
SpillPlacement.cpp
@@ -104,6 +106,7 @@ add_llvm_library(LLVMCodeGen
StackSlotColoring.cpp
StackMapLivenessAnalysis.cpp
StackMaps.cpp
+ StatepointExampleGC.cpp
TailDuplication.cpp
TargetFrameLoweringImpl.cpp
TargetInstrInfo.cpp
@@ -115,6 +118,11 @@ add_llvm_library(LLVMCodeGen
TwoAddressInstructionPass.cpp
UnreachableBlockElim.cpp
VirtRegMap.cpp
+ WinEHPrepare.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen/PBQP
)
add_dependencies(LLVMCodeGen intrinsics_gen)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 56ecde0..034ffb3 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -14,9 +14,11 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -178,3 +180,70 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
llvm_unreachable(nullptr);
}
}
+
+static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
+ if (VT.isVector())
+ return true; // Assume -msse-regparm might be in effect.
+ if (!VT.isInteger())
+ return false;
+ if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall)
+ return true;
+ return false;
+}
+
+void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
+ MVT VT, CCAssignFn Fn) {
+ unsigned SavedStackOffset = StackOffset;
+ unsigned NumLocs = Locs.size();
+
+ // Set the 'inreg' flag if it is used for this calling convention.
+ ISD::ArgFlagsTy Flags;
+ if (isValueTypeInRegForCC(CallingConv, VT))
+ Flags.setInReg();
+
+ // Allocate something of this value type repeatedly until we get assigned a
+ // location in memory.
+ bool HaveRegParm = true;
+ while (HaveRegParm) {
+ if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call has unhandled type " << EVT(VT).getEVTString()
+ << " while computing remaining regparms\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+ HaveRegParm = Locs.back().isRegLoc();
+ }
+
+ // Copy all the registers from the value locations we added.
+ assert(NumLocs < Locs.size() && "CC assignment failed to add location");
+ for (unsigned I = NumLocs, E = Locs.size(); I != E; ++I)
+ if (Locs[I].isRegLoc())
+ Regs.push_back(MCPhysReg(Locs[I].getLocReg()));
+
+ // Clear the assigned values and stack memory. We leave the registers marked
+ // as allocated so that future queries don't return the same registers, i.e.
+ // when i64 and f64 are both passed in GPRs.
+ StackOffset = SavedStackOffset;
+ Locs.resize(NumLocs);
+}
+
+void CCState::analyzeMustTailForwardedRegisters(
+ SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes,
+ CCAssignFn Fn) {
+ // Oftentimes calling conventions will not user register parameters for
+ // variadic functions, so we need to assume we're not variadic so that we get
+ // all the registers that might be used in a non-variadic call.
+ SaveAndRestore<bool> SavedVarArg(IsVarArg, false);
+
+ for (MVT RegVT : RegParmTypes) {
+ SmallVector<MCPhysReg, 8> RemainingRegs;
+ getRemainingRegParmsForType(RemainingRegs, RegVT, Fn);
+ const TargetLowering *TL = MF.getSubtarget().getTargetLowering();
+ const TargetRegisterClass *RC = TL->getRegClassFor(RegVT);
+ for (MCPhysReg PReg : RemainingRegs) {
+ unsigned VReg = MF.addLiveIn(PReg, RC);
+ Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT));
+ }
+ }
+}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 307dec5..7c0068e 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -21,7 +21,6 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
- initializeBasicTTIPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 8d20848..c0d7dca 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -30,20 +31,22 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -70,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
+static cl::opt<bool>
+ DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable GC optimizations in CodeGenPrepare"));
+
static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
@@ -90,6 +97,16 @@ static cl::opt<bool> StressStoreExtract(
"stress-cgp-store-extract", cl::Hidden, cl::init(false),
cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
+static cl::opt<bool> DisableExtLdPromotion(
+ "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
+ "CodeGenPrepare"));
+
+static cl::opt<bool> StressExtLdPromotion(
+ "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
+ "optimization in CodeGenPrepare"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
struct TypeIsSExt {
@@ -98,6 +115,7 @@ struct TypeIsSExt {
TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
};
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -143,8 +161,8 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfo>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
@@ -152,12 +170,12 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
- bool OptimizeBlock(BasicBlock &BB);
- bool OptimizeInst(Instruction *I);
+ bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
+ bool OptimizeInst(Instruction *I, bool& ModifiedDT);
bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
bool OptimizeInlineAsmInst(CallInst *CS);
- bool OptimizeCallInst(CallInst *CI);
- bool MoveExtToFormExtLoad(Instruction *I);
+ bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
+ bool MoveExtToFormExtLoad(Instruction *&I);
bool OptimizeExtUses(Instruction *I);
bool OptimizeSelectInst(SelectInst *SI);
bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
@@ -165,6 +183,12 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
bool DupRetToEnableTailCallOpts(BasicBlock *BB);
bool PlaceDbgValues(Function &F);
bool sinkAndCmp(Function &F);
+ bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
+ Instruction *&Inst,
+ const SmallVectorImpl<Instruction *> &Exts,
+ unsigned CreatedInst);
+ bool splitBranchCondition(Function &F);
+ bool simplifyOffsetableRelocate(Instruction &I);
};
}
@@ -187,14 +211,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
ModifiedDT = false;
if (TM)
- TLI = TM->getSubtargetImpl()->getTargetLowering();
- TLInfo = &getAnalysis<TargetLibraryInfo>();
- TTI = &getAnalysis<TargetTransformInfo>();
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
+ OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
@@ -218,15 +241,23 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// into a single target instruction, push the mask and compare into branch
// users. Do this before OptimizeBlock -> OptimizeInst ->
// OptimizeCmpExpression, which perturbs the pattern being searched for.
- if (!DisableBranchOpts)
+ if (!DisableBranchOpts) {
EverMadeChange |= sinkAndCmp(F);
+ EverMadeChange |= splitBranchCondition(F);
+ }
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = I++;
- MadeChange |= OptimizeBlock(*BB);
+ bool ModifiedDTOnIteration = false;
+ MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
+
+ // Restart BB iteration if the dominator tree of the Function was changed
+ ModifiedDT |= ModifiedDTOnIteration;
+ if (ModifiedDTOnIteration)
+ break;
}
EverMadeChange |= MadeChange;
}
@@ -236,9 +267,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!DisableBranchOpts) {
MadeChange = false;
SmallPtrSet<BasicBlock*, 8> WorkList;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
- MadeChange |= ConstantFoldTerminator(BB, true);
+ for (BasicBlock &BB : F) {
+ SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
+ MadeChange |= ConstantFoldTerminator(&BB, true);
if (!MadeChange) continue;
for (SmallVectorImpl<BasicBlock*>::iterator
@@ -272,6 +303,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |= MadeChange;
}
+ if (!DisableGCOpts) {
+ SmallVector<Instruction *, 2> Statepoints;
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (isStatepoint(I))
+ Statepoints.push_back(&I);
+ for (auto &I : Statepoints)
+ EverMadeChange |= simplifyOffsetableRelocate(*I);
+ }
+
if (ModifiedDT && DT)
DT->recalculate(F);
@@ -300,7 +341,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// Remember if SinglePred was the entry block of the function.
// If so, we will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(BB, this);
+ MergeBasicBlockIntoOnlyPred(BB, DT);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -440,7 +481,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, this);
+ MergeBasicBlockIntoOnlyPred(DestBB, DT);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -495,6 +536,144 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
+// Computes a map of base pointer relocation instructions to corresponding
+// derived pointer relocation instructions given a vector of all relocate calls
+static void computeBaseDerivedRelocateMap(
+ const SmallVectorImpl<User *> &AllRelocateCalls,
+ DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
+ RelocateInstMap) {
+ // Collect information in two maps: one primarily for locating the base object
+ // while filling the second map; the second map is the final structure holding
+ // a mapping between Base and corresponding Derived relocate calls
+ DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
+ for (auto &U : AllRelocateCalls) {
+ GCRelocateOperands ThisRelocate(U);
+ IntrinsicInst *I = cast<IntrinsicInst>(U);
+ auto K = std::make_pair(ThisRelocate.basePtrIndex(),
+ ThisRelocate.derivedPtrIndex());
+ RelocateIdxMap.insert(std::make_pair(K, I));
+ }
+ for (auto &Item : RelocateIdxMap) {
+ std::pair<unsigned, unsigned> Key = Item.first;
+ if (Key.first == Key.second)
+ // Base relocation: nothing to insert
+ continue;
+
+ IntrinsicInst *I = Item.second;
+ auto BaseKey = std::make_pair(Key.first, Key.first);
+ IntrinsicInst *Base = RelocateIdxMap[BaseKey];
+ if (!Base)
+ // TODO: We might want to insert a new base object relocate and gep off
+ // that, if there are enough derived object relocates.
+ continue;
+ RelocateInstMap[Base].push_back(I);
+ }
+}
+
+// Accepts a GEP and extracts the operands into a vector provided they're all
+// small integer constants
+static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
+ SmallVectorImpl<Value *> &OffsetV) {
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+ // Only accept small constant integer operands
+ auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!Op || Op->getZExtValue() > 20)
+ return false;
+ }
+
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++)
+ OffsetV.push_back(GEP->getOperand(i));
+ return true;
+}
+
+// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
+// replace, computes a replacement, and affects it.
+static bool
+simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
+ const SmallVectorImpl<IntrinsicInst *> &Targets) {
+ bool MadeChange = false;
+ for (auto &ToReplace : Targets) {
+ GCRelocateOperands MasterRelocate(RelocatedBase);
+ GCRelocateOperands ThisRelocate(ToReplace);
+
+ assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() &&
+ "Not relocating a derived object of the original base object");
+ if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) {
+ // A duplicate relocate call. TODO: coalesce duplicates.
+ continue;
+ }
+
+ Value *Base = ThisRelocate.basePtr();
+ auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr());
+ if (!Derived || Derived->getPointerOperand() != Base)
+ continue;
+
+ SmallVector<Value *, 2> OffsetV;
+ if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
+ continue;
+
+ // Create a Builder and replace the target callsite with a gep
+ IRBuilder<> Builder(ToReplace);
+ Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+ Value *Replacement =
+ Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV));
+ Instruction *ReplacementInst = cast<Instruction>(Replacement);
+ ReplacementInst->removeFromParent();
+ ReplacementInst->insertAfter(RelocatedBase);
+ Replacement->takeName(ToReplace);
+ ToReplace->replaceAllUsesWith(Replacement);
+ ToReplace->eraseFromParent();
+
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+// Turns this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = relocate(%tok, i32 4, i32 4)
+// %ptr' = relocate(%tok, i32 4, i32 5)
+// %val = load %ptr'
+//
+// into this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = gc.relocate(%tok, i32 4, i32 4)
+// %ptr' = gep %base' + 15
+// %val = load %ptr'
+bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
+ bool MadeChange = false;
+ SmallVector<User *, 2> AllRelocateCalls;
+
+ for (auto *U : I.users())
+ if (isGCRelocate(dyn_cast<Instruction>(U)))
+ // Collect all the relocate calls associated with a statepoint
+ AllRelocateCalls.push_back(U);
+
+ // We need atleast one base pointer relocation + one derived pointer
+ // relocation to mangle
+ if (AllRelocateCalls.size() < 2)
+ return false;
+
+ // RelocateInstMap is a mapping from the base relocate instruction to the
+ // corresponding derived relocate instructions
+ DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
+ computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
+ if (RelocateInstMap.empty())
+ return false;
+
+ for (auto &Item : RelocateInstMap)
+ // Item.first is the RelocatedBase to offset against
+ // Item.second is the vector of Targets to replace
+ MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
+ return MadeChange;
+}
+
/// SinkCast - Sink the specified cast instruction into its user blocks
static bool SinkCast(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();
@@ -822,23 +1001,211 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
return MadeChange;
}
-namespace {
-class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
-protected:
- void replaceCall(Value *With) override {
- CI->replaceAllUsesWith(With);
- CI->eraseFromParent();
+// ScalarizeMaskedLoad() translates masked load intrinsic, like
+// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask, <16 x i32> %passthru)
+// to a chain of basic blocks, whith loading element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.load, label %else
+//
+//cond.load: ; preds = %0
+// %4 = getelementptr i32* %1, i32 0
+// %5 = load i32* %4
+// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
+// br label %else
+//
+//else: ; preds = %0, %cond.load
+// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
+// %7 = extractelement <16 x i1> %mask, i32 1
+// %8 = icmp eq i1 %7, true
+// br i1 %8, label %cond.load1, label %else2
+//
+//cond.load1: ; preds = %else
+// %9 = getelementptr i32* %1, i32 1
+// %10 = load i32* %9
+// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
+// br label %else2
+//
+//else2: ; preds = %else, %cond.load1
+// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+// %12 = extractelement <16 x i1> %mask, i32 2
+// %13 = icmp eq i1 %12, true
+// br i1 %13, label %cond.load4, label %else5
+//
+static void ScalarizeMaskedLoad(CallInst *CI) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Src0 = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(2);
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+ Type *EltTy = VecType->getElementType();
+
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ unsigned VectorWidth = VecType->getNumElements();
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_load = icmp eq i1 %mask_1, true
+ // br i1 %to_load, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst* Load = Builder.CreateLoad(Gep, false);
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
}
- bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override {
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
- return SizeCI->isAllOnesValue();
- return false;
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+}
+
+// ScalarizeMaskedStore() translates masked store intrinsic, like
+// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.store, label %else
+//
+// cond.store: ; preds = %0
+// %4 = extractelement <16 x i32> %val, i32 0
+// %5 = getelementptr i32* %1, i32 0
+// store i32 %4, i32* %5
+// br label %else
+//
+// else: ; preds = %0, %cond.store
+// %6 = extractelement <16 x i1> %mask, i32 1
+// %7 = icmp eq i1 %6, true
+// br i1 %7, label %cond.store1, label %else2
+//
+// cond.store1: ; preds = %else
+// %8 = extractelement <16 x i32> %val, i32 1
+// %9 = getelementptr i32* %1, i32 1
+// store i32 %8, i32* %9
+// br label %else2
+// . . .
+static void ScalarizeMaskedStore(CallInst *CI) {
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Src = CI->getArgOperand(0);
+ Value *Mask = CI->getArgOperand(3);
+
+ VectorType *VecType = dyn_cast<VectorType>(Src->getType());
+ Type *EltTy = VecType->getElementType();
+
+ assert(VecType && "Unexpected data type in masked store intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+
+ unsigned VectorWidth = VecType->getNumElements();
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_store = icmp eq i1 %mask_1, true
+ // br i1 %to_load, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateStore(OneElt, Gep);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
}
-};
-} // end anonymous namespace
+ CI->eraseFromParent();
+}
-bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
@@ -858,38 +1225,60 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
return true;
}
- // Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
- if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
- bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
- Type *ReturnTy = CI->getType();
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-
- // Substituting this can cause recursive simplifications, which can
- // invalidate our iterator. Use a WeakVH to hold onto it in case this
- // happens.
- WeakVH IterHandle(CurInstIterator);
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::objectsize: {
+ // Lower all uses of llvm.objectsize.*
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ Type *ReturnTy = CI->getType();
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+
+ // Substituting this can cause recursive simplifications, which can
+ // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // happens.
+ WeakVH IterHandle(CurInstIterator);
+
+ replaceAndRecursivelySimplify(CI, RetVal,
+ TLI ? TLI->getDataLayout() : nullptr,
+ TLInfo, ModifiedDT ? nullptr : DT);
- replaceAndRecursivelySimplify(CI, RetVal,
- TLI ? TLI->getDataLayout() : nullptr,
- TLInfo, ModifiedDT ? nullptr : DT);
-
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- if (IterHandle != CurInstIterator) {
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurInstIterator) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ return true;
+ }
+ case Intrinsic::masked_load: {
+ // Scalarize unsupported vector masked load
+ if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
+ ScalarizeMaskedLoad(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_store: {
+ if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
+ ScalarizeMaskedStore(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
}
- return true;
- }
- if (II && TLI) {
- SmallVector<Value*, 2> PtrOps;
- Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
- while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
- return true;
+ if (TLI) {
+ SmallVector<Value*, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty())
+ if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ return true;
+ }
}
// From here on out we're working with named functions.
@@ -901,10 +1290,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
- // that have the default "don't know" as the objectsize. Anything else
- // should be left alone.
- CodeGenPrepareFortifiedLibCalls Simplifier;
- return Simplifier.fold(CI, TD, TLInfo);
+ // to fortified library functions (e.g. __memcpy_chk) that have the default
+ // "don't know" as the objectsize. Anything else should be left alone.
+ FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true);
+ if (Value *V = Simplifier.optimizeCall(CI)) {
+ CI->replaceAllUsesWith(V);
+ CI->eraseFromParent();
+ return true;
+ }
+ return false;
}
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
@@ -1561,6 +1955,7 @@ void TypePromotionTransaction::rollback(
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
SmallVectorImpl<Instruction*> &AddrModeInsts;
+ const TargetMachine &TM;
const TargetLowering &TLI;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
@@ -1584,13 +1979,15 @@ class AddressingModeMatcher {
/// always returns true.
bool IgnoreProfitability;
- AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
- const TargetLowering &T, Type *AT,
- Instruction *MI, ExtAddrMode &AM,
- const SetOfInstrs &InsertedTruncs,
+ AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
+ const TargetMachine &TM, Type *AT, Instruction *MI,
+ ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
- : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
+ : AddrModeInsts(AMI), TM(TM),
+ TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
+ ->getTargetLowering()),
+ AccessTy(AT), MemoryInst(MI), AddrMode(AM),
InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
IgnoreProfitability = false;
}
@@ -1607,13 +2004,13 @@ public:
static ExtAddrMode Match(Value *V, Type *AccessTy,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetLowering &TLI,
+ const TargetMachine &TM,
const SetOfInstrs &InsertedTruncs,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy,
MemoryInst, Result, InsertedTruncs,
PromotedInsts, TPT).MatchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -1718,6 +2115,23 @@ static bool MightBeFoldableInst(Instruction *I) {
}
}
+/// \brief Check whether or not \p Val is a legal instruction for \p TLI.
+/// \note \p Val is assumed to be the product of some type promotion.
+/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
+/// to be legal, as the non-promoted value would have had the same state.
+static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
+ Instruction *PromotedInst = dyn_cast<Instruction>(Val);
+ if (!PromotedInst)
+ return false;
+ int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
+ // If the ISDOpcode is undefined, it was undefined before the promotion.
+ if (!ISDOpcode)
+ return true;
+ // Otherwise, check if the promoted instruction is legal or not.
+ return TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+}
+
/// \brief Hepler class to perform type promotion.
class TypePromotionHelper {
/// \brief Utility function to check whether or not a sign or zero extension
@@ -1747,46 +2161,59 @@ class TypePromotionHelper {
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
/// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *promoteOperandForTruncAndAnyExt(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ static Value *promoteOperandForTruncAndAnyExt(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs);
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
/// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *promoteOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts, bool IsSExt);
+ static Value *
+ promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
/// \see promoteOperandForOther.
- static Value *signExtendOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, true);
+ static Value *
+ signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
+ Truncs, true);
}
/// \see promoteOperandForOther.
- static Value *zeroExtendOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, false);
+ static Value *
+ zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
+ Truncs, false);
}
public:
/// Type for the utility function that promotes the operand of Ext.
typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs);
/// \brief Given a sign/zero extend instruction \p Ext, return the approriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
@@ -1805,6 +2232,12 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
Type *ConsideredExtType,
const InstrToOrigTy &PromotedInsts,
bool IsSExt) {
+ // The promotion helper does not know how to deal with vector types yet.
+ // To be able to fix that, we would need to fix the places where we
+ // statically extend, e.g., constants and such.
+ if (Inst->getType()->isVectorTy())
+ return false;
+
// We can always get through zext.
if (isa<ZExtInst>(Inst))
return true;
@@ -1830,8 +2263,9 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// Check if we can use this operand in the extension.
// If the type is larger than the result type of the extension,
// we cannot.
- if (OpndVal->getType()->getIntegerBitWidth() >
- ConsideredExtType->getIntegerBitWidth())
+ if (!OpndVal->getType()->isIntegerTy() ||
+ OpndVal->getType()->getIntegerBitWidth() >
+ ConsideredExtType->getIntegerBitWidth())
return false;
// If the operand of the truncate is not an instruction, we will not have
@@ -1896,7 +2330,9 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) {
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
@@ -1922,8 +2358,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// Check if the extension is still needed.
Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
- if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType())
+ if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
+ if (ExtInst && Exts)
+ Exts->push_back(ExtInst);
return ExtVal;
+ }
// At this point we have: ext ty opnd to ty.
// Reassign the uses of ExtInst to the opnd and remove ExtInst.
@@ -1934,7 +2373,9 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
Value *TypePromotionHelper::promoteOperandForOther(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, bool IsSExt) {
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
// By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
@@ -1949,6 +2390,8 @@ Value *TypePromotionHelper::promoteOperandForOther(
ITrunc->removeFromParent();
// Insert it just after the definition.
ITrunc->insertAfter(ExtOpnd);
+ if (Truncs)
+ Truncs->push_back(ITrunc);
}
TPT.replaceAllUsesWith(ExtOpnd, Trunc);
@@ -2004,12 +2447,17 @@ Value *TypePromotionHelper::promoteOperandForOther(
if (!ExtForOpnd) {
// If yes, create a new one.
DEBUG(dbgs() << "More operands to ext\n");
- ExtForOpnd =
- cast<Instruction>(IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
- : TPT.createZExt(Ext, Opnd, Ext->getType()));
+ Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
+ : TPT.createZExt(Ext, Opnd, Ext->getType());
+ if (!isa<Instruction>(ValForExtOpnd)) {
+ TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
+ continue;
+ }
+ ExtForOpnd = cast<Instruction>(ValForExtOpnd);
++CreatedInsts;
}
-
+ if (Exts)
+ Exts->push_back(ExtForOpnd);
TPT.setOperand(ExtForOpnd, 0, Opnd);
// Move the sign extension before the insertion point.
@@ -2047,16 +2495,7 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
// Check that we did not create an illegal instruction.
- Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand);
- if (!PromotedInst)
- return false;
- int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
- // If the ISDOpcode is undefined, it was undefined before the promotion.
- if (!ISDOpcode)
- return true;
- // Otherwise, check if the promoted instruction is legal or not.
- return TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+ return isPromotedInstructionLegal(TLI, PromotedOperand);
}
/// MatchOperationAddr - Given an instruction or constant expr, see if we can
@@ -2250,7 +2689,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
unsigned CreatedInsts = 0;
- Value *PromotedOperand = TPH(Ext, TPT, PromotedInsts, CreatedInsts);
+ Value *PromotedOperand =
+ TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
@@ -2374,13 +2814,17 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
/// inline asm call are due to memory operands. If so, return true, otherwise
/// return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetLowering &TLI) {
- TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ const TargetMachine &TM) {
+ const Function *F = CI->getParent()->getParent();
+ const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, SDValue());
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
@@ -2396,10 +2840,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
/// memory use. If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
- SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
- SmallPtrSetImpl<Instruction*> &ConsideredInsts,
- const TargetLowering &TLI) {
+static bool FindAllMemoryUses(
+ Instruction *I,
+ SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -2429,12 +2873,12 @@ static bool FindAllMemoryUses(Instruction *I,
if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+ if (!IsOperandAMemoryOperand(CI, IA, I, TM))
return true;
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
return true;
}
@@ -2522,7 +2966,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// uses.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -2547,7 +2991,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy,
MemoryInst, Result, InsertedTruncs,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
@@ -2630,7 +3074,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
+ V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet,
PromotedInsts, TPT);
// This check is broken into two cases with very similar code to avoid using
@@ -2705,8 +3149,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
<< *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
- } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
- TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
+ } else if (AddrSinkUsingGEPs ||
+ (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
+ TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
+ ->useAA())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -2929,8 +3375,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
+ const TargetRegisterInfo *TRI =
+ TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(CS);
+ TargetConstraints = TLI->ParseConstraints(TRI, CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -2949,26 +3397,186 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
return MadeChange;
}
+/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
+/// sign extensions.
+static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
+ assert(!Inst->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
+ bool IsSExt = isa<SExtInst>(FirstUser);
+ Type *ExtTy = FirstUser->getType();
+ for (const User *U : Inst->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
+ return false;
+ Type *CurTy = UI->getType();
+ // Same input and output types: Same instruction after CSE.
+ if (CurTy == ExtTy)
+ continue;
+
+ // If IsSExt is true, we are in this situation:
+ // a = Inst
+ // b = sext ty1 a to ty2
+ // c = sext ty1 a to ty3
+ // Assuming ty2 is shorter than ty3, this could be turned into:
+ // a = Inst
+ // b = sext ty1 a to ty2
+ // c = sext ty2 b to ty3
+ // However, the last sext is not free.
+ if (IsSExt)
+ return false;
+
+ // This is a ZExt, maybe this is free to extend from one type to another.
+ // In that case, we would not account for a different use.
+ Type *NarrowTy;
+ Type *LargeTy;
+ if (ExtTy->getScalarType()->getIntegerBitWidth() >
+ CurTy->getScalarType()->getIntegerBitWidth()) {
+ NarrowTy = CurTy;
+ LargeTy = ExtTy;
+ } else {
+ NarrowTy = ExtTy;
+ LargeTy = CurTy;
+ }
+
+ if (!TLI.isZExtFree(NarrowTy, LargeTy))
+ return false;
+ }
+ // All uses are the same or can be derived from one another for free.
+ return true;
+}
+
+/// \brief Try to form ExtLd by promoting \p Exts until they reach a
+/// load instruction.
+/// If an ext(load) can be formed, it is returned via \p LI for the load
+/// and \p Inst for the extension.
+/// Otherwise LI == nullptr and Inst == nullptr.
+/// When some promotion happened, \p TPT contains the proper state to
+/// revert them.
+///
+/// \return true when promoting was necessary to expose the ext(load)
+/// opportunity, false otherwise.
+///
+/// Example:
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+/// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Thanks to the promotion, we can match zext(load i32*) to i64.
+bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
+ LoadInst *&LI, Instruction *&Inst,
+ const SmallVectorImpl<Instruction *> &Exts,
+ unsigned CreatedInsts = 0) {
+ // Iterate over all the extensions to see if one form an ext(load).
+ for (auto I : Exts) {
+ // Check if we directly have ext(load).
+ if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
+ Inst = I;
+ // No promotion happened here.
+ return false;
+ }
+ // Check whether or not we want to do any promotion.
+ if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
+ continue;
+ // Get the action to perform the promotion.
+ TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
+ I, InsertedTruncsSet, *TLI, PromotedInsts);
+ // Check if we can promote.
+ if (!TPH)
+ continue;
+ // Save the current state.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 4> NewExts;
+ unsigned NewCreatedInsts = 0;
+ // Promote.
+ Value *PromotedVal =
+ TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+ assert(PromotedVal &&
+ "TypePromotionHelper should have filtered out those cases");
+
+ // We would be able to merge only one extension in a load.
+ // Therefore, if we have more than 1 new extension we heuristically
+ // cut this search path, because it means we degrade the code quality.
+ // With exactly 2, the transformation is neutral, because we will merge
+ // one extension but leave one. However, we optimistically keep going,
+ // because the new extension may be removed too.
+ unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+ if (!StressExtLdPromotion &&
+ (TotalCreatedInsts > 1 ||
+ !isPromotedInstructionLegal(*TLI, PromotedVal))) {
+ // The promotion is not profitable, rollback to the previous state.
+ TPT.rollback(LastKnownGood);
+ continue;
+ }
+ // The promotion is profitable.
+ // Check if it exposes an ext(load).
+ (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
+ if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+ // If we have created a new extension, i.e., now we have two
+ // extensions. We must make sure one of them is merged with
+ // the load, otherwise we may degrade the code quality.
+ (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
+ // Promotion happened.
+ return true;
+ // If this does not help to expose an ext(load) then, rollback.
+ TPT.rollback(LastKnownGood);
+ }
+ // None of the extension can form an ext(load).
+ LI = nullptr;
+ Inst = nullptr;
+ return false;
+}
+
/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
/// basic block as the load, unless conditions are unfavorable. This allows
/// SelectionDAG to fold the extend into the load.
+/// \p I[in/out] the extension may be modified during the process if some
+/// promotions apply.
///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
+ // Try to promote a chain of computation if it allows to form
+ // an extended load.
+ TypePromotionTransaction TPT;
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 1> Exts;
+ Exts.push_back(I);
// Look for a load being extended.
- LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
- if (!LI) return false;
+ LoadInst *LI = nullptr;
+ Instruction *OldExt = I;
+ bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
+ if (!LI || !I) {
+ assert(!HasPromoted && !LI && "If we did not match any load instruction "
+ "the code must remain the same");
+ I = OldExt;
+ return false;
+ }
// If they're already in the same block, there's nothing to do.
- if (LI->getParent() == I->getParent())
+ // Make the cheap checks first if we did not promote.
+ // If we promoted, we need to check if it is indeed profitable.
+ if (!HasPromoted && LI->getParent() == I->getParent())
return false;
+ EVT VT = TLI->getValueType(I->getType());
+ EVT LoadVT = TLI->getValueType(LI->getType());
+
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
- if (!LI->hasOneUse() &&
- TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
- !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
- !TLI->isTruncateFree(I->getType(), LI->getType()))
+ if (!LI->hasOneUse() && TLI &&
+ (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
+ !TLI->isTruncateFree(I->getType(), LI->getType())) {
+ I = OldExt;
+ TPT.rollback(LastKnownGood);
return false;
+ }
// Check whether the target supports casts folded into loads.
unsigned LType;
@@ -2978,11 +3586,15 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
assert(isa<SExtInst>(I) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
- if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+ if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
+ I = OldExt;
+ TPT.rollback(LastKnownGood);
return false;
+ }
// Move the extend into the same block as the load, so that SelectionDAG
// can fold it.
+ TPT.commit();
I->removeFromParent();
I->insertAfter(LI);
++NumExtsMoved;
@@ -3512,7 +4124,8 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
isa<UndefValue>(Val) ||
canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
} else
- assert(0 && "Did you modified shouldPromote and forgot to update this?");
+ llvm_unreachable("Did you modified shouldPromote and forgot to update "
+ "this?");
ToBePromoted->setOperand(U.getOperandNo(), NewVal);
}
Transition->removeFromParent();
@@ -3575,7 +4188,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
return false;
}
-bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
@@ -3654,14 +4267,14 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
GEPI->replaceAllUsesWith(NC);
GEPI->eraseFromParent();
++NumGEPsElim;
- OptimizeInst(NC);
+ OptimizeInst(NC, ModifiedDT);
return true;
}
return false;
}
if (CallInst *CI = dyn_cast<CallInst>(I))
- return OptimizeCallInst(CI);
+ return OptimizeCallInst(CI, ModifiedDT);
if (SelectInst *SI = dyn_cast<SelectInst>(I))
return OptimizeSelectInst(SI);
@@ -3678,14 +4291,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
SunkAddrs.clear();
bool MadeChange = false;
CurInstIterator = BB.begin();
- while (CurInstIterator != BB.end())
- MadeChange |= OptimizeInst(CurInstIterator++);
-
+ while (CurInstIterator != BB.end()) {
+ MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
+ if (ModifiedDT)
+ return true;
+ }
MadeChange |= DupRetToEnableTailCallOpts(&BB);
return MadeChange;
@@ -3696,10 +4311,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
// find a node corresponding to the value.
bool CodeGenPrepare::PlaceDbgValues(Function &F) {
bool MadeChange = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ for (BasicBlock &BB : F) {
Instruction *PrevNonDbgInst = nullptr;
- for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
- Instruction *Insn = BI; ++BI;
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
+ Instruction *Insn = BI++;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
// Leave dbg.values that refer to an alloca alone. These
// instrinsics describe the address of a variable (= the alloca)
@@ -3793,3 +4408,233 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
}
return MadeChange;
}
+
+/// \brief Retrieve the probabilities of a conditional branch. Returns true on
+/// success, or returns false if no or invalid metadata was found.
+static bool extractBranchMetadata(BranchInst *BI,
+ uint64_t &ProbTrue, uint64_t &ProbFalse) {
+ assert(BI->isConditional() &&
+ "Looking for probabilities on unconditional branch?");
+ auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
+ if (!ProfileData || ProfileData->getNumOperands() != 3)
+ return false;
+
+ const auto *CITrue =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
+ const auto *CIFalse =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
+ if (!CITrue || !CIFalse)
+ return false;
+
+ ProbTrue = CITrue->getValue().getZExtValue();
+ ProbFalse = CIFalse->getValue().getZExtValue();
+
+ return true;
+}
+
+/// \brief Scale down both weights to fit into uint32_t.
+static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
+ uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+ uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+ NewTrue = NewTrue / Scale;
+ NewFalse = NewFalse / Scale;
+}
+
+/// \brief Some targets prefer to split a conditional branch like:
+/// \code
+/// %0 = icmp ne i32 %a, 0
+/// %1 = icmp ne i32 %b, 0
+/// %or.cond = or i1 %0, %1
+/// br i1 %or.cond, label %TrueBB, label %FalseBB
+/// \endcode
+/// into multiple branch instructions like:
+/// \code
+/// bb1:
+/// %0 = icmp ne i32 %a, 0
+/// br i1 %0, label %TrueBB, label %bb2
+/// bb2:
+/// %1 = icmp ne i32 %b, 0
+/// br i1 %1, label %TrueBB, label %FalseBB
+/// \endcode
+/// This usually allows instruction selection to do even further optimizations
+/// and combine the compare with the branch instruction. Currently this is
+/// applied for targets which have "cheap" jump instructions.
+///
+/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
+///
+bool CodeGenPrepare::splitBranchCondition(Function &F) {
+ if (!TM || TM->Options.EnableFastISel != true ||
+ !TLI || TLI->isJumpExpensive())
+ return false;
+
+ bool MadeChange = false;
+ for (auto &BB : F) {
+ // Does this BB end with the following?
+ // %cond1 = icmp|fcmp|binary instruction ...
+ // %cond2 = icmp|fcmp|binary instruction ...
+ // %cond.or = or|and i1 %cond1, cond2
+ // br i1 %cond.or label %dest1, label %dest2"
+ BinaryOperator *LogicOp;
+ BasicBlock *TBB, *FBB;
+ if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
+ continue;
+
+ unsigned Opc;
+ Value *Cond1, *Cond2;
+ if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
+ Opc = Instruction::And;
+ else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
+ Opc = Instruction::Or;
+ else
+ continue;
+
+ if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
+ !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
+ continue;
+
+ DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
+
+ // Create a new BB.
+ auto *InsertBefore = std::next(Function::iterator(BB))
+ .getNodePtrUnchecked();
+ auto TmpBB = BasicBlock::Create(BB.getContext(),
+ BB.getName() + ".cond.split",
+ BB.getParent(), InsertBefore);
+
+ // Update original basic block by using the first condition directly by the
+ // branch instruction and removing the no longer needed and/or instruction.
+ auto *Br1 = cast<BranchInst>(BB.getTerminator());
+ Br1->setCondition(Cond1);
+ LogicOp->eraseFromParent();
+
+ // Depending on the conditon we have to either replace the true or the false
+ // successor of the original branch instruction.
+ if (Opc == Instruction::And)
+ Br1->setSuccessor(0, TmpBB);
+ else
+ Br1->setSuccessor(1, TmpBB);
+
+ // Fill in the new basic block.
+ auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
+ if (auto *I = dyn_cast<Instruction>(Cond2)) {
+ I->removeFromParent();
+ I->insertBefore(Br2);
+ }
+
+ // Update PHI nodes in both successors. The original BB needs to be
+ // replaced in one succesor's PHI nodes, because the branch comes now from
+ // the newly generated BB (NewBB). In the other successor we need to add one
+ // incoming edge to the PHI nodes, because both branch instructions target
+ // now the same successor. Depending on the original branch condition
+ // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
+ // we perfrom the correct update for the PHI nodes.
+ // This doesn't change the successor order of the just created branch
+ // instruction (or any other instruction).
+ if (Opc == Instruction::Or)
+ std::swap(TBB, FBB);
+
+ // Replace the old BB with the new BB.
+ for (auto &I : *TBB) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+ int i;
+ while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
+ PN->setIncomingBlock(i, TmpBB);
+ }
+
+ // Add another incoming edge form the new BB.
+ for (auto &I : *FBB) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+ auto *Val = PN->getIncomingValueForBlock(&BB);
+ PN->addIncoming(Val, TmpBB);
+ }
+
+ // Update the branch weights (from SelectionDAGBuilder::
+ // FindMergedConditions).
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for NewBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
+ // assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
+ uint64_t NewTrueWeight = TrueWeight;
+ uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+
+ NewTrueWeight = TrueWeight;
+ NewFalseWeight = 2 * FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ } else {
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
+ // assumes that
+ // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
+ uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
+ uint64_t NewFalseWeight = FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+
+ NewTrueWeight = 2 * TrueWeight;
+ NewFalseWeight = FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ }
+
+ // Request DOM Tree update.
+ // Note: No point in getting fancy here, since the DT info is never
+ // available to CodeGenPrepare and the existing update code is broken
+ // anyways.
+ ModifiedDT = true;
+
+ MadeChange = true;
+
+ DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
+ TmpBB->dump());
+ }
+ return MadeChange;
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 48213c1..c17a35d 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -59,6 +59,10 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
if (MI->isInlineAsm())
return false;
+ // Don't delete frame allocation labels.
+ if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC)
+ return false;
+
// Don't delete instructions with side effects.
bool SawStore = false;
if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI())
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 75b74d9..7b47a48 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -14,18 +14,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
#define DEBUG_TYPE "dwarfehprepare"
@@ -44,10 +38,13 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
+
+ // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in
+ // practice.
+ DwarfEHPrepare() : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr) {}
+
DwarfEHPrepare(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {
- initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {}
bool runOnFunction(Function &Fn) override;
@@ -56,8 +53,6 @@ namespace {
return false;
}
- void getAnalysisUsage(AnalysisUsage &AU) const override { }
-
const char *getPassName() const override {
return "Exception handling preparation";
}
@@ -65,6 +60,8 @@ namespace {
} // end anonymous namespace
char DwarfEHPrepare::ID = 0;
+INITIALIZE_TM_PASS(DwarfEHPrepare, "dwarfehprepare", "Prepare DWARF exceptions",
+ false, false)
FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
return new DwarfEHPrepare(TM);
@@ -99,11 +96,11 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
RI->eraseFromParent();
if (EraseIVIs) {
- if (SelIVI->getNumUses() == 0)
+ if (SelIVI->use_empty())
SelIVI->eraseFromParent();
- if (ExcIVI->getNumUses() == 0)
+ if (ExcIVI->use_empty())
ExcIVI->eraseFromParent();
- if (SelLoad && SelLoad->getNumUses() == 0)
+ if (SelLoad && SelLoad->use_empty())
SelLoad->eraseFromParent();
}
@@ -114,9 +111,8 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
/// into calls to the appropriate _Unwind_Resume function.
bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
SmallVector<ResumeInst*, 16> Resumes;
- for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
- TerminatorInst *TI = I->getTerminator();
- if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
+ for (BasicBlock &BB : Fn) {
+ if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
Resumes.push_back(RI);
}
@@ -124,9 +120,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
return false;
// Find the rewind function if we didn't already.
- const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
+ LLVMContext &Ctx = Fn.getContext();
if (!RewindFunction) {
- LLVMContext &Ctx = Resumes[0]->getContext();
FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
Type::getInt8PtrTy(Ctx), false);
const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
@@ -134,7 +130,6 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
}
// Create the basic block where the _Unwind_Resume call will live.
- LLVMContext &Ctx = Fn.getContext();
unsigned ResumesSize = Resumes.size();
if (ResumesSize == 1) {
@@ -159,9 +154,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
// Extract the exception object from the ResumeInst and add it to the PHI node
// that feeds the _Unwind_Resume call.
- for (SmallVectorImpl<ResumeInst*>::iterator
- I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
- ResumeInst *RI = *I;
+ for (ResumeInst *RI : Resumes) {
BasicBlock *Parent = RI->getParent();
BranchInst::Create(UnwindBB, Parent);
@@ -181,6 +174,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
}
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ assert(TM && "DWARF EH preparation requires a target machine");
bool Changed = InsertUnwindResumeCalls(Fn);
return Changed;
}
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 995606f..8f74271 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -777,15 +777,13 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
<< "********** Function: " << MF.getName() << '\n');
// Only run if conversion if the target wants it.
- if (!MF.getTarget()
- .getSubtarget<TargetSubtargetInfo>()
- .enableEarlyIfConversion())
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ if (!STI.enableEarlyIfConversion())
return false;
- TII = MF.getSubtarget().getInstrInfo();
- TRI = MF.getSubtarget().getRegisterInfo();
- SchedModel =
- MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ SchedModel = STI.getSchedModel();
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp
index 85b0893..024946d 100644
--- a/lib/CodeGen/ErlangGC.cpp
+++ b/lib/CodeGen/ErlangGC.cpp
@@ -27,56 +27,20 @@ using namespace llvm;
namespace {
- class ErlangGC : public GCStrategy {
- MCSymbol *InsertLabel(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- DebugLoc DL) const;
- public:
- ErlangGC();
- bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) override;
- };
-
+class ErlangGC : public GCStrategy {
+public:
+ ErlangGC();
+};
}
-static GCRegistry::Add<ErlangGC>
-X("erlang", "erlang-compatible garbage collector");
+static GCRegistry::Add<ErlangGC> X("erlang",
+ "erlang-compatible garbage collector");
-void llvm::linkErlangGC() { }
+void llvm::linkErlangGC() {}
ErlangGC::ErlangGC() {
InitRoots = false;
NeededSafePoints = 1 << GC::PostCall;
UsesMetadata = true;
CustomRoots = false;
- CustomSafePoints = true;
-}
-
-MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- DebugLoc DL) const {
- const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
- MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
- BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
- return Label;
-}
-
-bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
- for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
- ++BBI)
- for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
- MI != ME; ++MI)
-
- if (MI->getDesc().isCall()) {
-
- // Do not treat tail call sites as safe points.
- if (MI->getDesc().isTerminator())
- continue;
-
- /* Code copied from VisitCallPoint(...) */
- MachineBasicBlock::iterator RAI = MI; ++RAI;
- MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
- FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
- }
-
- return false;
}
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 3680498..b3a22c8 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -74,6 +75,9 @@ struct DomainValue {
// Is domain available?
bool hasDomain(unsigned domain) const {
+ assert(domain <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
return AvailableDomains & (1u << domain);
}
@@ -133,7 +137,7 @@ class ExeDepsFix : public MachineFunctionPass {
MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- std::vector<int> AliasMap;
+ std::vector<SmallVector<int, 1>> AliasMap;
const unsigned NumRegs;
LiveReg *LiveRegs;
typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
@@ -169,8 +173,8 @@ public:
}
private:
- // Register mapping.
- int regIndex(unsigned Reg);
+ iterator_range<SmallVectorImpl<int>::const_iterator>
+ regIndizes(unsigned Reg) const;
// DomainValue allocation.
DomainValue *alloc(int domain = -1);
@@ -201,11 +205,13 @@ private:
char ExeDepsFix::ID = 0;
-/// Translate TRI register number to an index into our smaller tables of
-/// interesting registers. Return -1 for boring registers.
-int ExeDepsFix::regIndex(unsigned Reg) {
+/// Translate TRI register number to a list of indizes into our stmaller tables
+/// of interesting registers.
+iterator_range<SmallVectorImpl<int>::const_iterator>
+ExeDepsFix::regIndizes(unsigned Reg) const {
assert(Reg < AliasMap.size() && "Invalid register");
- return AliasMap[Reg];
+ const auto &Entry = AliasMap[Reg];
+ return make_range(Entry.begin(), Entry.end());
}
DomainValue *ExeDepsFix::alloc(int domain) {
@@ -338,9 +344,11 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
// All uses of B are referred to A.
B->Next = retain(A);
- for (unsigned rx = 0; rx != NumRegs; ++rx)
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ assert(LiveRegs && "no space allocated for live registers");
if (LiveRegs[rx].Value == B)
setLiveReg(rx, A);
+ }
return true;
}
@@ -370,13 +378,12 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
if (MBB->pred_empty()) {
for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
e = MBB->livein_end(); i != e; ++i) {
- int rx = regIndex(*i);
- if (rx < 0)
- continue;
- // Treat function live-ins as if they were defined just before the first
- // instruction. Usually, function arguments are set up immediately
- // before the call.
- LiveRegs[rx].Def = -1;
+ for (int rx : regIndizes(*i)) {
+ // Treat function live-ins as if they were defined just before the first
+ // instruction. Usually, function arguments are set up immediately
+ // before the call.
+ LiveRegs[rx].Def = -1;
+ }
}
DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
return;
@@ -467,26 +474,26 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
/// or undef use.
bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
- int rx = regIndex(MI->getOperand(OpIdx).getReg());
- if (rx < 0)
- return false;
-
- unsigned Clearance = CurInstr - LiveRegs[rx].Def;
- DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+ unsigned reg = MI->getOperand(OpIdx).getReg();
+ for (int rx : regIndizes(reg)) {
+ unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+ DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
- if (Pref > Clearance) {
- DEBUG(dbgs() << ": Break dependency.\n");
- return true;
- }
- // The current clearance seems OK, but we may be ignoring a def from a
- // back-edge.
- if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
- DEBUG(dbgs() << ": OK .\n");
+ if (Pref > Clearance) {
+ DEBUG(dbgs() << ": Break dependency.\n");
+ continue;
+ }
+ // The current clearance seems OK, but we may be ignoring a def from a
+ // back-edge.
+ if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+ DEBUG(dbgs() << ": OK .\n");
+ return false;
+ }
+ // A def from an unprocessed back-edge may make us break this dependency.
+ DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
return false;
}
- // A def from an unprocessed back-edge may make us break this dependency.
- DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
- return false;
+ return true;
}
// Update def-ages for registers defined by MI.
@@ -514,26 +521,24 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
break;
if (MO.isUse())
continue;
- int rx = regIndex(MO.getReg());
- if (rx < 0)
- continue;
-
- // This instruction explicitly defines rx.
- DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
- << '\t' << *MI);
-
- // Check clearance before partial register updates.
- // Call breakDependence before setting LiveRegs[rx].Def.
- unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
- if (Pref && shouldBreakDependence(MI, i, Pref))
- TII->breakPartialRegDependency(MI, i, TRI);
-
- // How many instructions since rx was last written?
- LiveRegs[rx].Def = CurInstr;
-
- // Kill off domains redefined by generic instructions.
- if (Kill)
- kill(rx);
+ for (int rx : regIndizes(MO.getReg())) {
+ // This instruction explicitly defines rx.
+ DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+ << '\t' << *MI);
+
+ // Check clearance before partial register updates.
+ // Call breakDependence before setting LiveRegs[rx].Def.
+ unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+ if (Pref && shouldBreakDependence(MI, i, Pref))
+ TII->breakPartialRegDependency(MI, i, TRI);
+
+ // How many instructions since rx was last written?
+ LiveRegs[rx].Def = CurInstr;
+
+ // Kill off domains redefined by generic instructions.
+ if (Kill)
+ kill(rx);
+ }
}
++CurInstr;
}
@@ -582,19 +587,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = regIndex(mo.getReg());
- if (rx < 0) continue;
- force(rx, domain);
+ for (int rx : regIndizes(mo.getReg())) {
+ force(rx, domain);
+ }
}
// Kill all defs and force them.
for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = regIndex(mo.getReg());
- if (rx < 0) continue;
- kill(rx);
- force(rx, domain);
+ for (int rx : regIndizes(mo.getReg())) {
+ kill(rx);
+ force(rx, domain);
+ }
}
}
@@ -611,9 +616,10 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = regIndex(mo.getReg());
- if (rx < 0) continue;
- if (DomainValue *dv = LiveRegs[rx].Value) {
+ for (int rx : regIndizes(mo.getReg())) {
+ DomainValue *dv = LiveRegs[rx].Value;
+ if (dv == nullptr)
+ continue;
// Bitmask of domains that dv and available have in common.
unsigned common = dv->getCommonDomains(available);
// Is it possible to use this collapsed register for free?
@@ -645,6 +651,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
SmallVector<LiveReg, 4> Regs;
for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
int rx = *i;
+ assert(LiveRegs && "no space allocated for live registers");
const LiveReg &LR = LiveRegs[rx];
// This useless DomainValue could have been missed above.
if (!LR.Value->getCommonDomains(available)) {
@@ -684,9 +691,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
continue;
// If latest didn't merge, it is useless now. Kill all registers using it.
- for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
- if (LiveRegs[*i].Value == Latest)
- kill(*i);
+ for (int i : used) {
+ assert(LiveRegs && "no space allocated for live registers");
+ if (LiveRegs[i].Value == Latest)
+ kill(i);
+ }
}
// dv is the DomainValue we are going to use for this instruction.
@@ -703,11 +712,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
ii != ee; ++ii) {
MachineOperand &mo = *ii;
if (!mo.isReg()) continue;
- int rx = regIndex(mo.getReg());
- if (rx < 0) continue;
- if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
- kill(rx);
- setLiveReg(rx, dv);
+ for (int rx : regIndizes(mo.getReg())) {
+ if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
+ kill(rx);
+ setLiveReg(rx, dv);
+ }
}
}
}
@@ -735,13 +744,13 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// Initialize the AliasMap on the first use.
if (AliasMap.empty()) {
- // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
- // or -1.
- AliasMap.resize(TRI->getNumRegs(), -1);
+ // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
+ // therefore the LiveRegs array.
+ AliasMap.resize(TRI->getNumRegs());
for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
AI.isValid(); ++AI)
- AliasMap[*AI] = i;
+ AliasMap[*AI].push_back(i);
}
MachineBasicBlock *Entry = MF->begin();
diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp
index 5e7e853..63c3699 100644
--- a/lib/CodeGen/ForwardControlFlowIntegrity.cpp
+++ b/lib/CodeGen/ForwardControlFlowIntegrity.cpp
@@ -25,9 +25,9 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index ed40982..a2c5fce 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -24,22 +24,20 @@
using namespace llvm;
namespace {
-
- class Printer : public FunctionPass {
- static char ID;
- raw_ostream &OS;
-
- public:
- explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+class Printer : public FunctionPass {
+ static char ID;
+ raw_ostream &OS;
- const char *getPassName() const override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+public:
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
- bool runOnFunction(Function &F) override;
- bool doFinalization(Module &M) override;
- };
+ const char *getPassName() const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+ bool doFinalization(Module &M) override;
+};
}
INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
@@ -48,7 +46,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
// -----------------------------------------------------------------------------
GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
- : F(F), S(S), FrameSize(~0LL) {}
+ : F(F), S(S), FrameSize(~0LL) {}
GCFunctionInfo::~GCFunctionInfo() {}
@@ -56,51 +54,29 @@ GCFunctionInfo::~GCFunctionInfo() {}
char GCModuleInfo::ID = 0;
-GCModuleInfo::GCModuleInfo()
- : ImmutablePass(ID) {
+GCModuleInfo::GCModuleInfo() : ImmutablePass(ID) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
}
-GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
- const std::string &Name) {
- strategy_map_type::iterator NMI = StrategyMap.find(Name);
- if (NMI != StrategyMap.end())
- return NMI->getValue();
-
- for (GCRegistry::iterator I = GCRegistry::begin(),
- E = GCRegistry::end(); I != E; ++I) {
- if (Name == I->getName()) {
- std::unique_ptr<GCStrategy> S = I->instantiate();
- S->M = M;
- S->Name = Name;
- StrategyMap[Name] = S.get();
- StrategyList.push_back(std::move(S));
- return StrategyList.back().get();
- }
- }
-
- dbgs() << "unsupported GC: " << Name << "\n";
- llvm_unreachable(nullptr);
-}
-
GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
assert(F.hasGC());
-
+
finfo_map_type::iterator I = FInfoMap.find(&F);
if (I != FInfoMap.end())
return *I->second;
-
- GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
- GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+
+ GCStrategy *S = getGCStrategy(F.getGC());
+ Functions.push_back(make_unique<GCFunctionInfo>(F, *S));
+ GCFunctionInfo *GFI = Functions.back().get();
FInfoMap[&F] = GFI;
return *GFI;
}
void GCModuleInfo::clear() {
+ Functions.clear();
FInfoMap.clear();
- StrategyMap.clear();
- StrategyList.clear();
+ GCStrategyList.clear();
}
// -----------------------------------------------------------------------------
@@ -111,7 +87,6 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
return new Printer(OS);
}
-
const char *Printer::getPassName() const {
return "Print Garbage Collector Information";
}
@@ -124,42 +99,49 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
static const char *DescKind(GC::PointKind Kind) {
switch (Kind) {
- case GC::Loop: return "loop";
- case GC::Return: return "return";
- case GC::PreCall: return "pre-call";
- case GC::PostCall: return "post-call";
+ case GC::Loop:
+ return "loop";
+ case GC::Return:
+ return "return";
+ case GC::PreCall:
+ return "pre-call";
+ case GC::PostCall:
+ return "post-call";
}
llvm_unreachable("Invalid point kind");
}
bool Printer::runOnFunction(Function &F) {
- if (F.hasGC()) return false;
-
+ if (F.hasGC())
+ return false;
+
GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
-
+
OS << "GC roots for " << FD->getFunction().getName() << ":\n";
for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
- RE = FD->roots_end(); RI != RE; ++RI)
+ RE = FD->roots_end();
+ RI != RE; ++RI)
OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
-
+
OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
- for (GCFunctionInfo::iterator PI = FD->begin(),
- PE = FD->end(); PI != PE; ++PI) {
-
- OS << "\t" << PI->Label->getName() << ": "
- << DescKind(PI->Kind) << ", live = {";
-
+ for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
+ ++PI) {
+
+ OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind)
+ << ", live = {";
+
for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
- RE = FD->live_end(PI);;) {
+ RE = FD->live_end(PI);
+ ;) {
OS << " " << RI->Num;
if (++RI == RE)
break;
OS << ",";
}
-
+
OS << " }\n";
}
-
+
return false;
}
@@ -169,3 +151,23 @@ bool Printer::doFinalization(Module &M) {
GMI->clear();
return false;
}
+
+
+GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
+ // TODO: Arguably, just doing a linear search would be faster for small N
+ auto NMI = GCStrategyMap.find(Name);
+ if (NMI != GCStrategyMap.end())
+ return NMI->getValue();
+
+ for (auto& Entry : GCRegistry::entries()) {
+ if (Name == Entry.getName()) {
+ std::unique_ptr<GCStrategy> S = Entry.instantiate();
+ S->Name = Name;
+ GCStrategyMap[Name] = S.get();
+ GCStrategyList.push_back(std::move(S));
+ return GCStrategyList.back().get();
+ }
+ }
+
+ report_fatal_error(std::string("unsupported GC: ") + Name);
+}
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index f80e9ce..bb8cfa1 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -14,14 +14,6 @@
#include "llvm/CodeGen/GCMetadataPrinter.h"
using namespace llvm;
-GCMetadataPrinter::GCMetadataPrinter() { }
+GCMetadataPrinter::GCMetadataPrinter() {}
-GCMetadataPrinter::~GCMetadataPrinter() { }
-
-void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
- // Default is no action.
-}
-
-void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
- // Default is no action.
-}
+GCMetadataPrinter::~GCMetadataPrinter() {}
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
new file mode 100644
index 0000000..9d38e4c
--- /dev/null
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -0,0 +1,351 @@
+//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering for the gc.root mechanism.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+/// directed by the GCStrategy. It also performs automatic root initialization
+/// and custom intrinsic lowering.
+class LowerIntrinsics : public FunctionPass {
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+
+public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+};
+
+/// GCMachineCodeAnalysis - This is a target-independent pass over the machine
+/// function representation to identify safe points for the garbage collector
+/// in the machine code. It inserts labels at safe points and populates a
+/// GCMetadata record for each function.
+class GCMachineCodeAnalysis : public MachineFunctionPass {
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+public:
+ static char ID;
+
+ GCMachineCodeAnalysis();
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); }
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+}
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+static bool NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier() || !C.customReadBarrier() ||
+ C.initializeRoots();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ return false;
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+static bool CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) ||
+ isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP))
+ ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst *, 16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ StoreInst *SI = new StoreInst(
+ ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I);
+ SI->insertAfter(*I);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst *, 32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St =
+ new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(
+ cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
+
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+ "Analyze Machine Code For Garbage Collection", false, false)
+
+GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {}
+
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+ MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+ }
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+ MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+ }
+}
+
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+ ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+ MI != ME; ++MI)
+ if (MI->isCall()) {
+ // Do not treat tail or sibling call sites as safe points. This is
+ // legal since any arguments passed to the callee which live in the
+ // remnants of the callers frame will be owned and updated by the
+ // callee if required.
+ if (MI->isTerminator())
+ continue;
+ VisitCallPoint(MI);
+ }
+}
+
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+ RI != FI->roots_end();) {
+ // If the root references a dead object, no need to keep it.
+ if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+ RI = FI->removeStackRoot(RI);
+ } else {
+ RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+ ++RI;
+ }
+ }
+}
+
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ if (!FI->getStrategy().needsSafePoints())
+ return false;
+
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ // Find the size of the stack frame.
+ FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+
+ // Find all safe points.
+ FindSafePoints(MF);
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index b346657..554d326 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -1,4 +1,4 @@
-//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//===-- GCStrategy.cpp - Garbage Collector Description --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,417 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements target- and collector-independent garbage collection
-// infrastructure.
-//
-// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
-// Roots are identified in SelectionDAGISel.
+// This file implements the policy object GCStrategy which describes the
+// behavior of a given garbage collector.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-namespace {
-
- /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
- /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
- /// directed by the GCStrategy. It also performs automatic root initialization
- /// and custom intrinsic lowering.
- class LowerIntrinsics : public FunctionPass {
- static bool NeedsDefaultLoweringPass(const GCStrategy &C);
- static bool NeedsCustomLoweringPass(const GCStrategy &C);
- static bool CouldBecomeSafePoint(Instruction *I);
- bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
- static bool InsertRootInitializers(Function &F,
- AllocaInst **Roots, unsigned Count);
-
- public:
- static char ID;
-
- LowerIntrinsics();
- const char *getPassName() const override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
- };
-
-
- /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
- /// function representation to identify safe points for the garbage collector
- /// in the machine code. It inserts labels at safe points and populates a
- /// GCMetadata record for each function.
- class GCMachineCodeAnalysis : public MachineFunctionPass {
- const TargetMachine *TM;
- GCFunctionInfo *FI;
- MachineModuleInfo *MMI;
- const TargetInstrInfo *TII;
-
- void FindSafePoints(MachineFunction &MF);
- void VisitCallPoint(MachineBasicBlock::iterator MI);
- MCSymbol *InsertLabel(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- DebugLoc DL) const;
-
- void FindStackOffsets(MachineFunction &MF);
-
- public:
- static char ID;
-
- GCMachineCodeAnalysis();
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
- };
-
-}
-
-// -----------------------------------------------------------------------------
-
-GCStrategy::GCStrategy() :
- NeededSafePoints(0),
- CustomReadBarriers(false),
- CustomWriteBarriers(false),
- CustomRoots(false),
- CustomSafePoints(false),
- InitRoots(true),
- UsesMetadata(false)
-{}
-
-bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
-
-bool GCStrategy::performCustomLowering(Function &F) {
- dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
- llvm_unreachable("must override performCustomLowering");
-}
-
-
-bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) {
- dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n";
- llvm_unreachable(nullptr);
-}
-
-
-GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
- Functions.push_back(make_unique<GCFunctionInfo>(F, *this));
- return Functions.back().get();
-}
-
-// -----------------------------------------------------------------------------
-
-INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
-INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
-
-FunctionPass *llvm::createGCLoweringPass() {
- return new LowerIntrinsics();
-}
-
-char LowerIntrinsics::ID = 0;
-
-LowerIntrinsics::LowerIntrinsics()
- : FunctionPass(ID) {
- initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
- }
-
-const char *LowerIntrinsics::getPassName() const {
- return "Lower Garbage Collection Instructions";
-}
-
-void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
- FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<GCModuleInfo>();
- AU.addPreserved<DominatorTreeWrapperPass>();
-}
-
-/// doInitialization - If this module uses the GC intrinsics, find them now.
-bool LowerIntrinsics::doInitialization(Module &M) {
- // FIXME: This is rather antisocial in the context of a JIT since it performs
- // work against the entire module. But this cannot be done at
- // runFunction time (initializeCustomLowering likely needs to change
- // the module).
- GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
- assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration() && I->hasGC())
- MI->getFunctionInfo(*I); // Instantiate the GC strategy.
-
- bool MadeChange = false;
- for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
- if (NeedsCustomLoweringPass(**I))
- if ((*I)->initializeCustomLowering(M))
- MadeChange = true;
-
- return MadeChange;
-}
-
-bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
- unsigned Count) {
- // Scroll past alloca instructions.
- BasicBlock::iterator IP = F.getEntryBlock().begin();
- while (isa<AllocaInst>(IP)) ++IP;
-
- // Search for initializers in the initial BB.
- SmallPtrSet<AllocaInst*,16> InitedRoots;
- for (; !CouldBecomeSafePoint(IP); ++IP)
- if (StoreInst *SI = dyn_cast<StoreInst>(IP))
- if (AllocaInst *AI =
- dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
- InitedRoots.insert(AI);
-
- // Add root initializers.
- bool MadeChange = false;
-
- for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
- if (!InitedRoots.count(*I)) {
- StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
- cast<PointerType>((*I)->getType())->getElementType())),
- *I);
- SI->insertAfter(*I);
- MadeChange = true;
- }
-
- return MadeChange;
-}
-
-bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
- // Default lowering is necessary only if read or write barriers have a default
- // action. The default for roots is no action.
- return !C.customWriteBarrier()
- || !C.customReadBarrier()
- || C.initializeRoots();
-}
-
-bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
- // Custom lowering is only necessary if enabled for some action.
- return C.customWriteBarrier()
- || C.customReadBarrier()
- || C.customRoots();
-}
-
-/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
-/// instruction could introduce a safe point.
-bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
- // The natural definition of instructions which could introduce safe points
- // are:
- //
- // - call, invoke (AfterCall, BeforeCall)
- // - phis (Loops)
- // - invoke, ret, unwind (Exit)
- //
- // However, instructions as seemingly inoccuous as arithmetic can become
- // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
- // it is necessary to take a conservative approach.
-
- if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
- isa<StoreInst>(I) || isa<LoadInst>(I))
- return false;
-
- // llvm.gcroot is safe because it doesn't do anything at runtime.
- if (CallInst *CI = dyn_cast<CallInst>(I))
- if (Function *F = CI->getCalledFunction())
- if (unsigned IID = F->getIntrinsicID())
- if (IID == Intrinsic::gcroot)
- return false;
-
- return true;
-}
-
-/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
-/// Leave gcroot intrinsics; the code generator needs to see those.
-bool LowerIntrinsics::runOnFunction(Function &F) {
- // Quick exit for functions that do not use GC.
- if (!F.hasGC())
- return false;
-
- GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
- GCStrategy &S = FI.getStrategy();
-
- bool MadeChange = false;
-
- if (NeedsDefaultLoweringPass(S))
- MadeChange |= PerformDefaultLowering(F, S);
-
- bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
- if (UseCustomLoweringPass)
- MadeChange |= S.performCustomLowering(F);
-
- // Custom lowering may modify the CFG, so dominators must be recomputed.
- if (UseCustomLoweringPass) {
- if (DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DTWP->getDomTree().recalculate(F);
- }
-
- return MadeChange;
-}
-
-bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
- bool LowerWr = !S.customWriteBarrier();
- bool LowerRd = !S.customReadBarrier();
- bool InitRoots = S.initializeRoots();
-
- SmallVector<AllocaInst*, 32> Roots;
-
- bool MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
- Function *F = CI->getCalledFunction();
- switch (F->getIntrinsicID()) {
- case Intrinsic::gcwrite:
- if (LowerWr) {
- // Replace a write barrier with a simple store.
- Value *St = new StoreInst(CI->getArgOperand(0),
- CI->getArgOperand(2), CI);
- CI->replaceAllUsesWith(St);
- CI->eraseFromParent();
- }
- break;
- case Intrinsic::gcread:
- if (LowerRd) {
- // Replace a read barrier with a simple load.
- Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
- Ld->takeName(CI);
- CI->replaceAllUsesWith(Ld);
- CI->eraseFromParent();
- }
- break;
- case Intrinsic::gcroot:
- if (InitRoots) {
- // Initialize the GC root, but do not delete the intrinsic. The
- // backend needs the intrinsic to flag the stack slot.
- Roots.push_back(cast<AllocaInst>(
- CI->getArgOperand(0)->stripPointerCasts()));
- }
- break;
- default:
- continue;
- }
-
- MadeChange = true;
- }
- }
- }
-
- if (Roots.size())
- MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
-
- return MadeChange;
-}
-
-// -----------------------------------------------------------------------------
-
-char GCMachineCodeAnalysis::ID = 0;
-char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
-
-INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
- "Analyze Machine Code For Garbage Collection", false, false)
-
-GCMachineCodeAnalysis::GCMachineCodeAnalysis()
- : MachineFunctionPass(ID) {}
-
-void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- MachineFunctionPass::getAnalysisUsage(AU);
- AU.setPreservesAll();
- AU.addRequired<MachineModuleInfo>();
- AU.addRequired<GCModuleInfo>();
-}
-
-MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- DebugLoc DL) const {
- MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
- BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
- return Label;
-}
-
-void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
- // Find the return address (next instruction), too, so as to bracket the call
- // instruction.
- MachineBasicBlock::iterator RAI = CI;
- ++RAI;
-
- if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
- MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
- FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
- }
-
- if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
- MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
- FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
- }
-}
-
-void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
- for (MachineFunction::iterator BBI = MF.begin(),
- BBE = MF.end(); BBI != BBE; ++BBI)
- for (MachineBasicBlock::iterator MI = BBI->begin(),
- ME = BBI->end(); MI != ME; ++MI)
- if (MI->isCall())
- VisitCallPoint(MI);
-}
-
-void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
- const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering();
- assert(TFI && "TargetRegisterInfo not available!");
-
- for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
- RI != FI->roots_end();) {
- // If the root references a dead object, no need to keep it.
- if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
- RI = FI->removeStackRoot(RI);
- } else {
- RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
- ++RI;
- }
- }
-}
-
-bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
- // Quick exit for functions that do not use GC.
- if (!MF.getFunction()->hasGC())
- return false;
-
- FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
- if (!FI->getStrategy().needsSafePoints())
- return false;
-
- TM = &MF.getTarget();
- MMI = &getAnalysis<MachineModuleInfo>();
- TII = TM->getSubtargetImpl()->getInstrInfo();
-
- // Find the size of the stack frame.
- FI->setFrameSize(MF.getFrameInfo()->getStackSize());
-
- // Find all safe points.
- if (FI->getStrategy().customSafePoints()) {
- FI->getStrategy().findCustomSafePoints(*FI, MF);
- } else {
- FindSafePoints(MF);
- }
-
- // Find the stack offsets for all roots.
- FindStackOffsets(MF);
-
- return false;
-}
+GCStrategy::GCStrategy()
+ : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
+ CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
+ UsesMetadata(false) {}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 457d7d6..4188e5d 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -54,6 +54,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -64,7 +65,6 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -90,10 +90,16 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
cl::desc("Enable global merge pass on external linkage"),
cl::init(false));
-STATISTIC(NumMerged , "Number of globals merged");
+STATISTIC(NumMerged, "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
const TargetMachine *TM;
+ const DataLayout *DL;
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions), see the code that passes in the offset in the ARM backend
+ // for more information.
+ unsigned MaxOffset;
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
@@ -117,8 +123,10 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- explicit GlobalMerge(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {
+ explicit GlobalMerge(const TargetMachine *TM = nullptr,
+ unsigned MaximalOffset = 0)
+ : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()),
+ MaxOffset(MaximalOffset) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -138,22 +146,16 @@ namespace {
} // end anonymous namespace
char GlobalMerge::ID = 0;
-INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables",
- false, false)
+INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables",
+ false, false)
+INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables",
+ false, false)
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
- const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
- const DataLayout *DL = TLI->getDataLayout();
-
- // FIXME: Infer the maximum possible offset depending on the actual users
- // (these max offsets are different for the users inside Thumb or ARM
- // functions)
- unsigned MaxOffset = TLI->getMaximalGlobalOffset();
-
// FIXME: Find better heuristics
std::stable_sort(Globals.begin(), Globals.end(),
- [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ [this](const GlobalVariable *GV1, const GlobalVariable *GV2) {
Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
@@ -282,9 +284,6 @@ bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
BSSGlobals;
- const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
- const DataLayout *DL = TLI->getDataLayout();
- unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
setMustKeepGlobalVariables(M);
@@ -357,6 +356,6 @@ bool GlobalMerge::doFinalization(Module &M) {
return false;
}
-Pass *llvm::createGlobalMergePass(const TargetMachine *TM) {
- return new GlobalMerge(TM);
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) {
+ return new GlobalMerge(TM, Offset);
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index e84d25d..7a29569 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -271,15 +271,13 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
- TLI = MF.getSubtarget().getTargetLowering();
- TII = MF.getSubtarget().getInstrInfo();
- TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TLI = ST.getTargetLowering();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MRI = &MF.getRegInfo();
-
- const TargetSubtargetInfo &ST =
- MF.getTarget().getSubtarget<TargetSubtargetInfo>();
SchedModel.init(ST.getSchedModel(), &ST, TII);
if (!TII) return false;
@@ -290,7 +288,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
BranchFolder BF(true, false, *MBFI, *MBPI);
- BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
+ BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 6a6e15d..f0d407f 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -508,6 +508,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
WorkList.push_back(std::make_pair(UseReg, UseVNI));
+ LiveInterval &OrigLI = LIS.getInterval(Original);
do {
unsigned Reg;
VNInfo *VNI;
@@ -521,8 +522,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
// Trace through PHI-defs created by live range splitting.
if (VNI->isPHIDef()) {
- // Stop at original PHIs. We don't know the value at the predecessors.
- if (VNI->def == OrigVNI->def) {
+ // Stop at original PHIs. We don't know the value at the
+ // predecessors. Look up the VNInfo for the current definition
+ // in OrigLI, to properly determine whether or not this phi was
+ // added by splitting.
+ if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) {
DEBUG(dbgs() << "orig phi value\n");
SVI->second.DefByOrigPHI = true;
SVI->second.AllDefsAreReloads = false;
@@ -542,7 +546,6 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
// Separate all values dominated by OrigVNI into PHIs and non-PHIs.
SmallVector<VNInfo*, 8> PHIs, NonPHIs;
LiveInterval &LI = LIS.getInterval(Reg);
- LiveInterval &OrigLI = LIS.getInterval(Original);
for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
VI != VE; ++VI) {
@@ -573,8 +576,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
std::tie(SVI, Inserted) =
SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
// Add all the PHIs as dependents of NonPHI.
- for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi)
- SVI->second.Deps.push_back(PHIs[pi]);
+ SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(),
+ PHIs.end());
// This is the first time we see NonPHI, add it to the worklist.
if (Inserted)
WorkList.push_back(std::make_pair(Reg, NonPHI));
@@ -1088,7 +1091,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
bool WasCopy = MI->isCopy();
unsigned ImpReg = 0;
- bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+ bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT ||
MI->getOpcode() == TargetOpcode::STACKMAP);
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
@@ -1138,13 +1142,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
continue;
// FoldMI does not define this physreg. Remove the LI segment.
assert(MO->isDead() && "Cannot fold physreg def");
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
- if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
- SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
- if (VNInfo *VNI = LR->getVNInfoAt(Idx))
- LR->removeValNo(VNI);
- }
- }
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ LIS.removePhysRegDefAt(Reg, Idx);
}
LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp
index 20f775c..75fa261 100644
--- a/lib/CodeGen/JumpInstrTables.cpp
+++ b/lib/CodeGen/JumpInstrTables.cpp
@@ -13,7 +13,6 @@
#define DEBUG_TYPE "jt"
#include "llvm/CodeGen/JumpInstrTables.h"
-
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -30,7 +29,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-
#include <vector>
using namespace llvm;
@@ -117,8 +115,8 @@ bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) {
if (!isa<GlobalAlias>(C))
C->replaceUsesOfWithOnConstant(GV, V, U);
} else {
- assert(false && "The Use of a Function symbol is neither an instruction nor"
- " a constant");
+ llvm_unreachable("The Use of a Function symbol is neither an instruction "
+ "nor a constant");
}
return true;
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 61face2..9c23368 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,23 +12,23 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
-
#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
#include "llvm/CodeGen/JumpInstrTables.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -78,8 +78,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
}
-void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- PM.add(createBasicTargetTransformInfoPass(this));
+TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(BasicTTIImpl(this, F));
+ });
}
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
@@ -90,7 +92,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
AnalysisID StopAfter) {
// Add internal analysis passes from the target machine.
- TM->addAnalysisPasses(PM);
+ PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
// Targets may override createPassConfig to provide a target-specific
// subclass.
@@ -114,7 +116,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
// all the per-module stuff we're generating, including MCContext.
MachineModuleInfo *MMI = new MachineModuleInfo(
*TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(),
- &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering());
+ TM->getObjFileLowering());
PM.add(MMI);
// Set up a MachineFunction for the rest of CodeGen to work on.
@@ -222,13 +224,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
}
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
- FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
if (!Printer)
return true;
- // If successful, createAsmPrinter took ownership of AsmStreamer.
- AsmStreamer.release();
-
PM.add(Printer);
return false;
@@ -262,20 +262,16 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
if (!MCE || !MAB)
return true;
- std::unique_ptr<MCStreamer> AsmStreamer;
- AsmStreamer.reset(getTarget()
- .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB,
- Out, MCE, STI,
- Options.MCOptions.MCRelaxAll));
+ std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
+ getTargetTriple(), *Ctx, *MAB, Out, MCE, STI,
+ Options.MCOptions.MCRelaxAll));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
- FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
if (!Printer)
return true;
- // If successful, createAsmPrinter took ownership of AsmStreamer.
- AsmStreamer.release();
-
PM.add(Printer);
return false; // success!
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index b621e3b..9eaf7da 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -104,14 +104,6 @@ void LexicalScopes::extractLexicalScopes(
}
}
-LexicalScope *LexicalScopes::findInlinedScope(DebugLoc DL) {
- MDNode *Scope = nullptr;
- MDNode *IA = nullptr;
- DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
- auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
- return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
-}
-
/// findLexicalScope - Find lexical scope, either regular or inlined, for the
/// given DebugLoc. Return NULL if not found.
LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
@@ -168,11 +160,10 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
LexicalScope *Parent = nullptr;
if (D.isLexicalBlock())
Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
- // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012
- // compatibility is no longer required.
- I = LexicalScopeMap.emplace(std::piecewise_construct, std::make_tuple(Scope),
- std::make_tuple(Parent, DIDescriptor(Scope),
- nullptr, false)).first;
+ I = LexicalScopeMap.emplace(std::piecewise_construct,
+ std::forward_as_tuple(Scope),
+ std::forward_as_tuple(Parent, DIDescriptor(Scope),
+ nullptr, false)).first;
if (!Parent) {
assert(DIDescriptor(Scope).isSubprogram());
@@ -199,12 +190,11 @@ LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode,
else
Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt);
- // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012
- // compatibility is no longer required.
I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
- std::make_tuple(P),
- std::make_tuple(Parent, Scope, InlinedAt,
- false)).first;
+ std::forward_as_tuple(P),
+ std::forward_as_tuple(Parent, Scope,
+ InlinedAt, false))
+ .first;
return &I->second;
}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 1624851..dc936a3 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -40,7 +40,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-
#include <memory>
using namespace llvm;
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 7e3b361..9748329 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -22,8 +22,8 @@
#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/DebugInfo.h"
namespace llvm {
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index ddb0032..fd7516d 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -26,11 +26,278 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
using namespace llvm;
+//===----------------------------------------------------------------------===//
+// Implementation of various methods necessary for calculation of live ranges.
+// The implementation of the methods abstracts from the concrete type of the
+// segment collection.
+//
+// Implementation of the class follows the Template design pattern. The base
+// class contains generic algorithms that call collection-specific methods,
+// which are provided in concrete subclasses. In order to avoid virtual calls
+// these methods are provided by means of C++ template instantiation.
+// The base class calls the methods of the subclass through method impl(),
+// which casts 'this' pointer to the type of the subclass.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename ImplT, typename IteratorT, typename CollectionT>
+class CalcLiveRangeUtilBase {
+protected:
+ LiveRange *LR;
+
+protected:
+ CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {}
+
+public:
+ typedef LiveRange::Segment Segment;
+ typedef IteratorT iterator;
+
+ VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) {
+ assert(!Def.isDead() && "Cannot define a value at the dead slot");
+
+ iterator I = impl().find(Def);
+ if (I == segments().end()) {
+ VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+ impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+
+ Segment *S = segmentAt(I);
+ if (SlotIndex::isSameInstr(Def, S->start)) {
+ assert(S->valno->def == S->start && "Inconsistent existing value def");
+
+ // It is possible to have both normal and early-clobber defs of the same
+ // register on an instruction. It doesn't make a lot of sense, but it is
+ // possible to specify in inline assembly.
+ //
+ // Just convert everything to early-clobber.
+ Def = std::min(Def, S->start);
+ if (Def != S->start)
+ S->start = S->valno->def = Def;
+ return S->valno;
+ }
+ assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def");
+ VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+ segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+
+ VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use) {
+ if (segments().empty())
+ return nullptr;
+ iterator I =
+ impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
+ if (I == segments().begin())
+ return nullptr;
+ --I;
+ if (I->end <= StartIdx)
+ return nullptr;
+ if (I->end < Use)
+ extendSegmentEndTo(I, Use);
+ return I->valno;
+ }
+
+ /// This method is used when we want to extend the segment specified
+ /// by I to end at the specified endpoint. To do this, we should
+ /// merge and eliminate all segments that this will overlap
+ /// with. The iterator is not invalidated.
+ void extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
+ assert(I != segments().end() && "Not a valid segment!");
+ Segment *S = segmentAt(I);
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first segment that we can't merge with.
+ iterator MergeTo = std::next(I);
+ for (; MergeTo != segments().end() && NewEnd >= MergeTo->end; ++MergeTo)
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+
+ // If NewEnd was in the middle of a segment, make sure to get its endpoint.
+ S->end = std::max(NewEnd, std::prev(MergeTo)->end);
+
+ // If the newly formed segment now touches the segment after it and if they
+ // have the same value number, merge the two segments into one segment.
+ if (MergeTo != segments().end() && MergeTo->start <= I->end &&
+ MergeTo->valno == ValNo) {
+ S->end = MergeTo->end;
+ ++MergeTo;
+ }
+
+ // Erase any dead segments.
+ segments().erase(std::next(I), MergeTo);
+ }
+
+ /// This method is used when we want to extend the segment specified
+ /// by I to start at the specified endpoint. To do this, we should
+ /// merge and eliminate all segments that this will overlap with.
+ iterator extendSegmentStartTo(iterator I, SlotIndex NewStart) {
+ assert(I != segments().end() && "Not a valid segment!");
+ Segment *S = segmentAt(I);
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first segment that we can't merge with.
+ iterator MergeTo = I;
+ do {
+ if (MergeTo == segments().begin()) {
+ S->start = NewStart;
+ segments().erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another segment, just delete a range and
+ // extend that segment.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ segmentAt(MergeTo)->end = S->end;
+ } else {
+ // Otherwise, extend the segment right after.
+ ++MergeTo;
+ Segment *MergeToSeg = segmentAt(MergeTo);
+ MergeToSeg->start = NewStart;
+ MergeToSeg->end = S->end;
+ }
+
+ segments().erase(std::next(MergeTo), std::next(I));
+ return MergeTo;
+ }
+
+ iterator addSegment(Segment S) {
+ SlotIndex Start = S.start, End = S.end;
+ iterator I = impl().findInsertPos(S);
+
+ // If the inserted segment starts in the middle or right at the end of
+ // another segment, just extend that segment to contain the segment of S.
+ if (I != segments().begin()) {
+ iterator B = std::prev(I);
+ if (S.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendSegmentEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live segments with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two segments with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this segment ends in the middle of, or right next
+ // to, another segment, merge it into that segment.
+ if (I != segments().end()) {
+ if (S.valno == I->valno) {
+ if (I->start <= End) {
+ I = extendSegmentStartTo(I, Start);
+
+ // If S is a complete superset of a segment, we may need to grow its
+ // endpoint as well.
+ if (End > I->end)
+ extendSegmentEndTo(I, End);
+ return I;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live segments with
+ // different valno's.
+ assert(I->start >= End &&
+ "Cannot overlap two segments with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new segment that doesn't interact with
+ // anything.
+ // Insert it.
+ return segments().insert(I, S);
+ }
+
+private:
+ ImplT &impl() { return *static_cast<ImplT *>(this); }
+
+ CollectionT &segments() { return impl().segmentsColl(); }
+
+ Segment *segmentAt(iterator I) { return const_cast<Segment *>(&(*I)); }
+};
+
+//===----------------------------------------------------------------------===//
+// Instantiation of the methods for calculation of live ranges
+// based on a segment vector.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilVector;
+typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
+ LiveRange::Segments> CalcLiveRangeUtilVectorBase;
+
+class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase {
+public:
+ CalcLiveRangeUtilVector(LiveRange *LR) : CalcLiveRangeUtilVectorBase(LR) {}
+
+private:
+ friend CalcLiveRangeUtilVectorBase;
+
+ LiveRange::Segments &segmentsColl() { return LR->segments; }
+
+ void insertAtEnd(const Segment &S) { LR->segments.push_back(S); }
+
+ iterator find(SlotIndex Pos) { return LR->find(Pos); }
+
+ iterator findInsertPos(Segment S) {
+ return std::upper_bound(LR->begin(), LR->end(), S.start);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Instantiation of the methods for calculation of live ranges
+// based on a segment set.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilSet;
+typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet,
+ LiveRange::SegmentSet::iterator,
+ LiveRange::SegmentSet> CalcLiveRangeUtilSetBase;
+
+class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase {
+public:
+ CalcLiveRangeUtilSet(LiveRange *LR) : CalcLiveRangeUtilSetBase(LR) {}
+
+private:
+ friend CalcLiveRangeUtilSetBase;
+
+ LiveRange::SegmentSet &segmentsColl() { return *LR->segmentSet; }
+
+ void insertAtEnd(const Segment &S) {
+ LR->segmentSet->insert(LR->segmentSet->end(), S);
+ }
+
+ iterator find(SlotIndex Pos) {
+ iterator I =
+ LR->segmentSet->upper_bound(Segment(Pos, Pos.getNextSlot(), nullptr));
+ if (I == LR->segmentSet->begin())
+ return I;
+ iterator PrevI = std::prev(I);
+ if (Pos < (*PrevI).end)
+ return PrevI;
+ return I;
+ }
+
+ iterator findInsertPos(Segment S) {
+ iterator I = LR->segmentSet->upper_bound(S);
+ if (I != LR->segmentSet->end() && !(S.start < *I))
+ ++I;
+ return I;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// LiveRange methods
+//===----------------------------------------------------------------------===//
+
LiveRange::iterator LiveRange::find(SlotIndex Pos) {
// This algorithm is basically std::upper_bound.
// Unfortunately, std::upper_bound cannot be used with mixed types until we
@@ -51,30 +318,11 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) {
VNInfo *LiveRange::createDeadDef(SlotIndex Def,
VNInfo::Allocator &VNInfoAllocator) {
- assert(!Def.isDead() && "Cannot define a value at the dead slot");
- iterator I = find(Def);
- if (I == end()) {
- VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
- segments.push_back(Segment(Def, Def.getDeadSlot(), VNI));
- return VNI;
- }
- if (SlotIndex::isSameInstr(Def, I->start)) {
- assert(I->valno->def == I->start && "Inconsistent existing value def");
-
- // It is possible to have both normal and early-clobber defs of the same
- // register on an instruction. It doesn't make a lot of sense, but it is
- // possible to specify in inline assembly.
- //
- // Just convert everything to early-clobber.
- Def = std::min(Def, I->start);
- if (Def != I->start)
- I->start = I->valno->def = Def;
- return I->valno;
- }
- assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
- VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
- segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI));
- return VNI;
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator);
}
// overlaps - Return true if the intersection of the two live ranges is
@@ -185,6 +433,27 @@ bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const {
return I != begin() && (--I)->end > Start;
}
+bool LiveRange::covers(const LiveRange &Other) const {
+ if (empty())
+ return Other.empty();
+
+ const_iterator I = begin();
+ for (const Segment &O : Other.segments) {
+ I = advanceTo(I, O.start);
+ if (I == end() || I->start > O.start)
+ return false;
+
+ // Check adjacent live segments and see if we can get behind O.end.
+ while (I->end < O.end) {
+ const_iterator Last = I;
+ // Get next segment and abort if it was not adjacent.
+ ++I;
+ if (I == end() || Last->end != I->start)
+ return false;
+ }
+ }
+ return true;
+}
/// ValNo is dead, remove it. If it is the largest value number, just nuke it
/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
@@ -204,8 +473,8 @@ void LiveRange::markValNoForDeletion(VNInfo *ValNo) {
void LiveRange::RenumberValues() {
SmallPtrSet<VNInfo*, 8> Seen;
valnos.clear();
- for (const_iterator I = begin(), E = end(); I != E; ++I) {
- VNInfo *VNI = I->valno;
+ for (const Segment &S : segments) {
+ VNInfo *VNI = S.valno;
if (!Seen.insert(VNI).second)
continue;
assert(!VNI->isUnused() && "Unused valno used by live segment");
@@ -214,133 +483,35 @@ void LiveRange::RenumberValues() {
}
}
-/// This method is used when we want to extend the segment specified by I to end
-/// at the specified endpoint. To do this, we should merge and eliminate all
-/// segments that this will overlap with. The iterator is not invalidated.
-void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
- assert(I != end() && "Not a valid segment!");
- VNInfo *ValNo = I->valno;
-
- // Search for the first segment that we can't merge with.
- iterator MergeTo = std::next(I);
- for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) {
- assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
- }
-
- // If NewEnd was in the middle of a segment, make sure to get its endpoint.
- I->end = std::max(NewEnd, std::prev(MergeTo)->end);
-
- // If the newly formed segment now touches the segment after it and if they
- // have the same value number, merge the two segments into one segment.
- if (MergeTo != end() && MergeTo->start <= I->end &&
- MergeTo->valno == ValNo) {
- I->end = MergeTo->end;
- ++MergeTo;
- }
-
- // Erase any dead segments.
- segments.erase(std::next(I), MergeTo);
+void LiveRange::addSegmentToSet(Segment S) {
+ CalcLiveRangeUtilSet(this).addSegment(S);
}
-
-/// This method is used when we want to extend the segment specified by I to
-/// start at the specified endpoint. To do this, we should merge and eliminate
-/// all segments that this will overlap with.
-LiveRange::iterator
-LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) {
- assert(I != end() && "Not a valid segment!");
- VNInfo *ValNo = I->valno;
-
- // Search for the first segment that we can't merge with.
- iterator MergeTo = I;
- do {
- if (MergeTo == begin()) {
- I->start = NewStart;
- segments.erase(MergeTo, I);
- return I;
- }
- assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
- --MergeTo;
- } while (NewStart <= MergeTo->start);
-
- // If we start in the middle of another segment, just delete a range and
- // extend that segment.
- if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
- MergeTo->end = I->end;
- } else {
- // Otherwise, extend the segment right after.
- ++MergeTo;
- MergeTo->start = NewStart;
- MergeTo->end = I->end;
+LiveRange::iterator LiveRange::addSegment(Segment S) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr) {
+ addSegmentToSet(S);
+ return end();
}
-
- segments.erase(std::next(MergeTo), std::next(I));
- return MergeTo;
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).addSegment(S);
}
-LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) {
- SlotIndex Start = S.start, End = S.end;
- iterator it = std::upper_bound(From, end(), Start);
-
- // If the inserted segment starts in the middle or right at the end of
- // another segment, just extend that segment to contain the segment of S.
- if (it != begin()) {
- iterator B = std::prev(it);
- if (S.valno == B->valno) {
- if (B->start <= Start && B->end >= Start) {
- extendSegmentEndTo(B, End);
- return B;
- }
- } else {
- // Check to make sure that we are not overlapping two live segments with
- // different valno's.
- assert(B->end <= Start &&
- "Cannot overlap two segments with differing ValID's"
- " (did you def the same reg twice in a MachineInstr?)");
- }
- }
-
- // Otherwise, if this segment ends in the middle of, or right next to, another
- // segment, merge it into that segment.
- if (it != end()) {
- if (S.valno == it->valno) {
- if (it->start <= End) {
- it = extendSegmentStartTo(it, Start);
-
- // If S is a complete superset of a segment, we may need to grow its
- // endpoint as well.
- if (End > it->end)
- extendSegmentEndTo(it, End);
- return it;
- }
- } else {
- // Check to make sure that we are not overlapping two live segments with
- // different valno's.
- assert(it->start >= End &&
- "Cannot overlap two segments with differing ValID's");
- }
- }
-
- // Otherwise, this is just a new segment that doesn't interact with anything.
- // Insert it.
- return segments.insert(it, S);
+void LiveRange::append(const Segment S) {
+ // Check that the segment belongs to the back of the list.
+ assert(segments.empty() || segments.back().end <= S.start);
+ segments.push_back(S);
}
/// extendInBlock - If this range is live before Kill in the basic
/// block that starts at StartIdx, extend it to be live up to Kill and return
/// the value. If there is no live range before Kill, return NULL.
VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
- if (empty())
- return nullptr;
- iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
- if (I == begin())
- return nullptr;
- --I;
- if (I->end <= StartIdx)
- return nullptr;
- if (I->end < Kill)
- extendSegmentEndTo(I, Kill);
- return I->valno;
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).extendInBlock(StartIdx, Kill);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill);
}
/// Remove the specified segment from this range. Note that the segment must
@@ -461,8 +632,8 @@ void LiveRange::join(LiveRange &Other,
// This can leave Other in an invalid state because we're not coalescing
// touching segments that now have identical values. That's OK since Other is
// not supposed to be valid after calling join();
- for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
- I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]];
+ for (Segment &S : Other.segments)
+ S.valno = NewVNInfo[RHSValNoAssignments[S.valno->id]];
// Update val# info. Renumber them and make sure they all belong to this
// LiveRange now. Also remove dead val#'s.
@@ -482,8 +653,8 @@ void LiveRange::join(LiveRange &Other,
// Okay, now insert the RHS live segments into the LHS.
LiveRangeUpdater Updater(this);
- for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
- Updater.add(*I);
+ for (Segment &S : Other.segments)
+ Updater.add(S);
}
/// Merge all of the segments in RHS into this live range as the specified
@@ -493,8 +664,8 @@ void LiveRange::join(LiveRange &Other,
void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS,
VNInfo *LHSValNo) {
LiveRangeUpdater Updater(this);
- for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
- Updater.add(I->start, I->end, LHSValNo);
+ for (const Segment &S : RHS.segments)
+ Updater.add(S.start, S.end, LHSValNo);
}
/// MergeValueInAsValue - Merge all of the live segments of a specific val#
@@ -506,9 +677,9 @@ void LiveRange::MergeValueInAsValue(const LiveRange &RHS,
const VNInfo *RHSValNo,
VNInfo *LHSValNo) {
LiveRangeUpdater Updater(this);
- for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
- if (I->valno == RHSValNo)
- Updater.add(I->start, I->end, LHSValNo);
+ for (const Segment &S : RHS.segments)
+ if (S.valno == RHSValNo)
+ Updater.add(S.start, S.end, LHSValNo);
}
/// MergeValueNumberInto - This method is called when two value nubmers
@@ -570,10 +741,258 @@ VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
return V2;
}
+void LiveRange::flushSegmentSet() {
+ assert(segmentSet != nullptr && "segment set must have been created");
+ assert(
+ segments.empty() &&
+ "segment set can be used only initially before switching to the array");
+ segments.append(segmentSet->begin(), segmentSet->end());
+ delete segmentSet;
+ segmentSet = nullptr;
+ verify();
+}
+
+void LiveInterval::freeSubRange(SubRange *S) {
+ S->~SubRange();
+ // Memory was allocated with BumpPtr allocator and is not freed here.
+}
+
+void LiveInterval::removeEmptySubRanges() {
+ SubRange **NextPtr = &SubRanges;
+ SubRange *I = *NextPtr;
+ while (I != nullptr) {
+ if (!I->empty()) {
+ NextPtr = &I->Next;
+ I = *NextPtr;
+ continue;
+ }
+ // Skip empty subranges until we find the first nonempty one.
+ do {
+ SubRange *Next = I->Next;
+ freeSubRange(I);
+ I = Next;
+ } while (I != nullptr && I->empty());
+ *NextPtr = I;
+ }
+}
+
+void LiveInterval::clearSubRanges() {
+ for (SubRange *I = SubRanges, *Next; I != nullptr; I = Next) {
+ Next = I->Next;
+ freeSubRange(I);
+ }
+ SubRanges = nullptr;
+}
+
+/// Helper function for constructMainRangeFromSubranges(): Search the CFG
+/// backwards until we find a place covered by a LiveRange segment that actually
+/// has a valno set.
+static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR,
+ const MachineBasicBlock *MBB,
+ SmallPtrSetImpl<const MachineBasicBlock*> &Visited) {
+ // We start the search at the end of MBB.
+ SlotIndex EndIdx = Indexes.getMBBEndIdx(MBB);
+ // In our use case we can't live the area covered by the live segments without
+ // finding an actual VNI def.
+ LiveRange::iterator I = LR.find(EndIdx.getPrevSlot());
+ assert(I != LR.end());
+ LiveRange::Segment &S = *I;
+ if (S.valno != nullptr)
+ return S.valno;
+
+ VNInfo *VNI = nullptr;
+ // Continue at predecessors (we could even go to idom with domtree available).
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ // Avoid going in circles.
+ if (!Visited.insert(Pred).second)
+ continue;
+
+ VNI = searchForVNI(Indexes, LR, Pred, Visited);
+ if (VNI != nullptr) {
+ S.valno = VNI;
+ break;
+ }
+ }
+
+ return VNI;
+}
+
+static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) {
+ SmallPtrSet<const MachineBasicBlock*, 5> Visited;
+ for (LiveRange::Segment &S : LI.segments) {
+ if (S.valno != nullptr)
+ continue;
+ // This can only happen at the begin of a basic block.
+ assert(S.start.isBlock() && "valno should only be missing at block begin");
+
+ Visited.clear();
+ const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
+ if (VNI != nullptr) {
+ S.valno = VNI;
+ break;
+ }
+ }
+ assert(S.valno != nullptr && "could not determine valno");
+ }
+}
+
+void LiveInterval::constructMainRangeFromSubranges(
+ const SlotIndexes &Indexes, VNInfo::Allocator &VNIAllocator) {
+ // The basic observations on which this algorithm is based:
+ // - Each Def/ValNo in a subrange must have a corresponding def on the main
+ // range, but not further defs/valnos are necessary.
+ // - If any of the subranges is live at a point the main liverange has to be
+ // live too, conversily if no subrange is live the main range mustn't be
+ // live either.
+ // We do this by scannig through all the subranges simultaneously creating new
+ // segments in the main range as segments start/ends come up in the subranges.
+ assert(hasSubRanges() && "expected subranges to be present");
+ assert(segments.empty() && valnos.empty() && "expected empty main range");
+
+ // Collect subrange, iterator pairs for the walk and determine first and last
+ // SlotIndex involved.
+ SmallVector<std::pair<const SubRange*, const_iterator>, 4> SRs;
+ SlotIndex First;
+ SlotIndex Last;
+ for (const SubRange &SR : subranges()) {
+ if (SR.empty())
+ continue;
+ SRs.push_back(std::make_pair(&SR, SR.begin()));
+ if (!First.isValid() || SR.segments.front().start < First)
+ First = SR.segments.front().start;
+ if (!Last.isValid() || SR.segments.back().end > Last)
+ Last = SR.segments.back().end;
+ }
+
+ // Walk over all subranges simultaneously.
+ Segment CurrentSegment;
+ bool ConstructingSegment = false;
+ bool NeedVNIFixup = false;
+ unsigned ActiveMask = 0;
+ SlotIndex Pos = First;
+ while (true) {
+ SlotIndex NextPos = Last;
+ enum {
+ NOTHING,
+ BEGIN_SEGMENT,
+ END_SEGMENT,
+ } Event = NOTHING;
+ // Which subregister lanes are affected by the current event.
+ unsigned EventMask = 0;
+ // Whether a BEGIN_SEGMENT is also a valno definition point.
+ bool IsDef = false;
+ // Find the next begin or end of a subrange segment. Combine masks if we
+ // have multiple begins/ends at the same position. Ends take precedence over
+ // Begins.
+ for (auto &SRP : SRs) {
+ const SubRange &SR = *SRP.first;
+ const_iterator &I = SRP.second;
+ // Advance iterator of subrange to a segment involving Pos; the earlier
+ // segments are already merged at this point.
+ while (I != SR.end() &&
+ (I->end < Pos ||
+ (I->end == Pos && (ActiveMask & SR.LaneMask) == 0)))
+ ++I;
+ if (I == SR.end())
+ continue;
+ if ((ActiveMask & SR.LaneMask) == 0 &&
+ Pos <= I->start && I->start <= NextPos) {
+ // Merge multiple begins at the same position.
+ if (I->start == NextPos && Event == BEGIN_SEGMENT) {
+ EventMask |= SR.LaneMask;
+ IsDef |= I->valno->def == I->start;
+ } else if (I->start < NextPos || Event != END_SEGMENT) {
+ Event = BEGIN_SEGMENT;
+ NextPos = I->start;
+ EventMask = SR.LaneMask;
+ IsDef = I->valno->def == I->start;
+ }
+ }
+ if ((ActiveMask & SR.LaneMask) != 0 &&
+ Pos <= I->end && I->end <= NextPos) {
+ // Merge multiple ends at the same position.
+ if (I->end == NextPos && Event == END_SEGMENT)
+ EventMask |= SR.LaneMask;
+ else {
+ Event = END_SEGMENT;
+ NextPos = I->end;
+ EventMask = SR.LaneMask;
+ }
+ }
+ }
+
+ // Advance scan position.
+ Pos = NextPos;
+ if (Event == BEGIN_SEGMENT) {
+ if (ConstructingSegment && IsDef) {
+ // Finish previous segment because we have to start a new one.
+ CurrentSegment.end = Pos;
+ append(CurrentSegment);
+ ConstructingSegment = false;
+ }
+
+ // Start a new segment if necessary.
+ if (!ConstructingSegment) {
+ // Determine value number for the segment.
+ VNInfo *VNI;
+ if (IsDef) {
+ VNI = getNextValue(Pos, VNIAllocator);
+ } else {
+ // We have to reuse an existing value number, if we are lucky
+ // then we already passed one of the predecessor blocks and determined
+ // its value number (with blocks in reverse postorder this would be
+ // always true but we have no such guarantee).
+ assert(Pos.isBlock());
+ const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Pos);
+ // See if any of the predecessor blocks has a lower number and a VNI
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred);
+ VNI = getVNInfoBefore(PredEnd);
+ if (VNI != nullptr)
+ break;
+ }
+ // Def will come later: We have to do an extra fixup pass.
+ if (VNI == nullptr)
+ NeedVNIFixup = true;
+ }
+
+ CurrentSegment.start = Pos;
+ CurrentSegment.valno = VNI;
+ ConstructingSegment = true;
+ }
+ ActiveMask |= EventMask;
+ } else if (Event == END_SEGMENT) {
+ assert(ConstructingSegment);
+ // Finish segment if no lane is active anymore.
+ ActiveMask &= ~EventMask;
+ if (ActiveMask == 0) {
+ CurrentSegment.end = Pos;
+ append(CurrentSegment);
+ ConstructingSegment = false;
+ }
+ } else {
+ // We reached the end of the last subranges and can stop.
+ assert(Event == NOTHING);
+ break;
+ }
+ }
+
+ // We might not be able to assign new valnos for all segments if the basic
+ // block containing the definition comes after a segment using the valno.
+ // Do a fixup pass for this uncommon case.
+ if (NeedVNIFixup)
+ determineMissingVNIs(Indexes, *this);
+
+ assert(ActiveMask == 0 && !ConstructingSegment && "all segments ended");
+ verify();
+}
+
unsigned LiveInterval::getSize() const {
unsigned Sum = 0;
- for (const_iterator I = begin(), E = end(); I != E; ++I)
- Sum += I->start.distance(I->end);
+ for (const Segment &S : segments)
+ Sum += S.start.distance(S.end);
return Sum;
}
@@ -591,9 +1010,9 @@ void LiveRange::print(raw_ostream &OS) const {
if (empty())
OS << "EMPTY";
else {
- for (const_iterator I = begin(), E = end(); I != E; ++I) {
- OS << *I;
- assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
+ for (const Segment &S : segments) {
+ OS << S;
+ assert(S.valno == getValNumInfo(S.valno->id) && "Bad VNInfo");
}
}
@@ -620,6 +1039,10 @@ void LiveRange::print(raw_ostream &OS) const {
void LiveInterval::print(raw_ostream &OS) const {
OS << PrintReg(reg) << ' ';
super::print(OS);
+ // Print subranges
+ for (const SubRange &SR : subranges()) {
+ OS << format(" L%04X ", SR.LaneMask) << SR;
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -648,6 +1071,26 @@ void LiveRange::verify() const {
}
}
}
+
+void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
+ super::verify();
+
+ // Make sure SubRanges are fine and LaneMasks are disjunct.
+ unsigned Mask = 0;
+ unsigned MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
+ for (const SubRange &SR : subranges()) {
+ // Subrange lanemask should be disjunct to any previous subrange masks.
+ assert((Mask & SR.LaneMask) == 0);
+ Mask |= SR.LaneMask;
+
+ // subrange mask should not contained in maximum lane mask for the vreg.
+ assert((Mask & ~MaxMask) == 0);
+
+ SR.verify();
+ // Main liverange should cover subrange.
+ assert(covers(SR));
+ }
+}
#endif
@@ -692,14 +1135,14 @@ void LiveRangeUpdater::print(raw_ostream &OS) const {
OS << " updater with gap = " << (ReadI - WriteI)
<< ", last start = " << LastStart
<< ":\n Area 1:";
- for (LiveRange::const_iterator I = LR->begin(); I != WriteI; ++I)
- OS << ' ' << *I;
+ for (const auto &S : make_range(LR->begin(), WriteI))
+ OS << ' ' << S;
OS << "\n Spills:";
for (unsigned I = 0, E = Spills.size(); I != E; ++I)
OS << ' ' << Spills[I];
OS << "\n Area 2:";
- for (LiveRange::const_iterator I = ReadI, E = LR->end(); I != E; ++I)
- OS << ' ' << *I;
+ for (const auto &S : make_range(ReadI, LR->end()))
+ OS << ' ' << S;
OS << '\n';
}
@@ -723,6 +1166,13 @@ static inline bool coalescable(const LiveRange::Segment &A,
void LiveRangeUpdater::add(LiveRange::Segment Seg) {
assert(LR && "Cannot add to a null destination");
+ // Fall back to the regular add method if the live range
+ // is using the segment set instead of the segment vector.
+ if (LR->segmentSet != nullptr) {
+ LR->addSegmentToSet(Seg);
+ return;
+ }
+
// Flush the state if Start moves backwards.
if (!LastStart.isValid() || LastStart > Seg.start) {
if (isDirty())
@@ -860,9 +1310,7 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
const VNInfo *used = nullptr, *unused = nullptr;
// Determine connections.
- for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
- I != E; ++I) {
- const VNInfo *VNI = *I;
+ for (const VNInfo *VNI : LI->valnos) {
// Group all unused values into one class.
if (VNI->isUnused()) {
if (unused)
@@ -938,6 +1386,8 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
} else
*J++ = *I;
}
+ // TODO: do not cheat anymore by simply cleaning all subranges
+ LI.clearSubRanges();
LI.segments.erase(J, E);
// Transfer VNInfos to their new owners and renumber them.
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 1742e63..cc08045 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -62,6 +63,17 @@ static cl::opt<bool> EnablePrecomputePhysRegs(
static bool EnablePrecomputePhysRegs = false;
#endif // NDEBUG
+static cl::opt<bool> EnableSubRegLiveness(
+ "enable-subreg-liveness", cl::Hidden, cl::init(true),
+ cl::desc("Enable subregister liveness tracking."));
+
+namespace llvm {
+cl::opt<bool> UseSegmentSetForPhysRegs(
+ "use-segment-set-for-physregs", cl::Hidden, cl::init(true),
+ cl::desc(
+ "Use segment set for the computation of the live ranges of physregs."));
+}
+
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
@@ -115,6 +127,10 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
AA = &getAnalysis<AliasAnalysis>();
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
+
+ if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness())
+ MRI->enableSubRegLiveness(true);
+
if (!LRCalc)
LRCalc = new LiveRangeCalc();
@@ -183,9 +199,8 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LRCalc->createDeadDefs(LI);
- LRCalc->extendToUses(LI);
- computeDeadValues(&LI, LI, nullptr, nullptr);
+ LRCalc->calculate(LI);
+ computeDeadValues(LI, nullptr);
}
void LiveIntervals::computeVirtRegs() {
@@ -260,6 +275,10 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
LRCalc->extendToUses(LR, Reg);
}
}
+
+ // Flush the segment set to the segment vector.
+ if (UseSegmentSetForPhysRegs)
+ LR.flushSegmentSet();
}
@@ -292,7 +311,8 @@ void LiveIntervals::computeLiveInRegUnits() {
unsigned Unit = *Units;
LiveRange *LR = RegUnitRanges[Unit];
if (!LR) {
- LR = RegUnitRanges[Unit] = new LiveRange();
+ // Use segment set to speed-up initial computation of the live range.
+ LR = RegUnitRanges[Unit] = new LiveRange(UseSegmentSetForPhysRegs);
NewRanges.push_back(Unit);
}
VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
@@ -312,6 +332,70 @@ void LiveIntervals::computeLiveInRegUnits() {
}
+static void createSegmentsForValues(LiveRange &LR,
+ iterator_range<LiveInterval::vni_iterator> VNIs) {
+ for (auto VNI : VNIs) {
+ if (VNI->isUnused())
+ continue;
+ SlotIndex Def = VNI->def;
+ LR.addSegment(LiveRange::Segment(Def, Def.getDeadSlot(), VNI));
+ }
+}
+
+typedef SmallVector<std::pair<SlotIndex, VNInfo*>, 16> ShrinkToUsesWorkList;
+
+static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
+ ShrinkToUsesWorkList &WorkList,
+ const LiveRange &OldRange) {
+ // Keep track of the PHIs that are in use.
+ SmallPtrSet<VNInfo*, 8> UsedPHIs;
+ // Blocks that have already been added to WorkList as live-out.
+ SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+ const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Idx.getPrevSlot());
+ SlotIndex BlockStart = Indexes.getMBBStartIdx(MBB);
+
+ // Extend the live range for VNI to be live at Idx.
+ if (VNInfo *ExtVNI = LR.extendInBlock(BlockStart, Idx)) {
+ assert(ExtVNI == VNI && "Unexpected existing value number");
+ (void)ExtVNI;
+ // Is this a PHIDef we haven't seen before?
+ if (!VNI->isPHIDef() || VNI->def != BlockStart ||
+ !UsedPHIs.insert(VNI).second)
+ continue;
+ // The PHI is live, make sure the predecessors are live-out.
+ for (auto &Pred : MBB->predecessors()) {
+ if (!LiveOut.insert(Pred).second)
+ continue;
+ SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop))
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (auto &Pred : MBB->predecessors()) {
+ if (!LiveOut.insert(Pred).second)
+ continue;
+ SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
+ assert(OldRange.getVNInfoBefore(Stop) == VNI &&
+ "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+}
+
/// shrinkToUses - After removing some uses of a register, shrink its live
/// range to just the remaining uses. This method does not compute reaching
/// defs for new uses, and it doesn't remove dead defs.
@@ -320,11 +404,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
DEBUG(dbgs() << "Shrink: " << *li << '\n');
assert(TargetRegisterInfo::isVirtualRegister(li->reg)
&& "Can only shrink virtual registers");
- // Find all the values used, including PHI kills.
- SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
- // Blocks that have already been added to WorkList as live-out.
- SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+ // Shrink subregister live ranges.
+ for (LiveInterval::SubRange &S : li->subranges()) {
+ shrinkToUses(S, li->reg);
+ }
+
+ // Find all the values used, including PHI kills.
+ ShrinkToUsesWorkList WorkList;
// Visit all instructions reading li->reg.
for (MachineRegisterInfo::reg_instr_iterator
@@ -355,103 +442,126 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Create new live ranges with only minimal live segments per def.
LiveRange NewLR;
- for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
- I != E; ++I) {
- VNInfo *VNI = *I;
- if (VNI->isUnused())
- continue;
- NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI));
- }
+ createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end()));
+ extendSegmentsToUses(NewLR, *Indexes, WorkList, *li);
- // Keep track of the PHIs that are in use.
- SmallPtrSet<VNInfo*, 8> UsedPHIs;
+ // Move the trimmed segments back.
+ li->segments.swap(NewLR.segments);
- // Extend intervals to reach all uses in WorkList.
- while (!WorkList.empty()) {
- SlotIndex Idx = WorkList.back().first;
- VNInfo *VNI = WorkList.back().second;
- WorkList.pop_back();
- const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot());
- SlotIndex BlockStart = getMBBStartIdx(MBB);
+ // Handle dead values.
+ bool CanSeparate = computeDeadValues(*li, dead);
+ DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
- // Extend the live range for VNI to be live at Idx.
- if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) {
- (void)ExtVNI;
- assert(ExtVNI == VNI && "Unexpected existing value number");
- // Is this a PHIDef we haven't seen before?
- if (!VNI->isPHIDef() || VNI->def != BlockStart ||
- !UsedPHIs.insert(VNI).second)
- continue;
- // The PHI is live, make sure the predecessors are live-out.
- for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- if (!LiveOut.insert(*PI).second)
- continue;
- SlotIndex Stop = getMBBEndIdx(*PI);
- // A predecessor is not required to have a live-out value for a PHI.
- if (VNInfo *PVNI = li->getVNInfoBefore(Stop))
- WorkList.push_back(std::make_pair(Stop, PVNI));
+bool LiveIntervals::computeDeadValues(LiveInterval &LI,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ bool PHIRemoved = false;
+ for (auto VNI : LI.valnos) {
+ if (VNI->isUnused())
+ continue;
+ SlotIndex Def = VNI->def;
+ LiveRange::iterator I = LI.FindSegmentContaining(Def);
+ assert(I != LI.end() && "Missing segment for VNI");
+
+ // Is the register live before? Otherwise we may have to add a read-undef
+ // flag for subregister defs.
+ if (MRI->tracksSubRegLiveness()) {
+ if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
+ MachineInstr *MI = getInstructionFromIndex(Def);
+ MI->addRegisterDefReadUndef(LI.reg);
}
+ }
+
+ if (I->end != Def.getDeadSlot())
continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->markUnused();
+ LI.removeSegment(I);
+ DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
+ PHIRemoved = true;
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(Def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(LI.reg, TRI);
+ if (dead && MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
+ dead->push_back(MI);
+ }
}
+ }
+ return PHIRemoved;
+}
- // VNI is live-in to MBB.
- DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
- NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
+void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
+{
+ DEBUG(dbgs() << "Shrink: " << SR << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(Reg)
+ && "Can only shrink virtual registers");
+ // Find all the values used, including PHI kills.
+ ShrinkToUsesWorkList WorkList;
- // Make sure VNI is live-out from the predecessors.
- for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- if (!LiveOut.insert(*PI).second)
+ // Visit all instructions reading Reg.
+ SlotIndex LastIdx;
+ for (MachineOperand &MO : MRI->reg_operands(Reg)) {
+ MachineInstr *UseMI = MO.getParent();
+ if (UseMI->isDebugValue())
+ continue;
+ // Maybe the operand is for a subregister we don't care about.
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ unsigned SubRegMask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((SubRegMask & SR.LaneMask) == 0)
continue;
- SlotIndex Stop = getMBBEndIdx(*PI);
- assert(li->getVNInfoBefore(Stop) == VNI &&
- "Wrong value out of predecessor");
- WorkList.push_back(std::make_pair(Stop, VNI));
}
+ // We only need to visit each instruction once.
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+ if (Idx == LastIdx)
+ continue;
+ LastIdx = Idx;
+
+ LiveQueryResult LRQ = SR.Query(Idx);
+ VNInfo *VNI = LRQ.valueIn();
+ // For Subranges it is possible that only undef values are left in that
+ // part of the subregister, so there is no real liverange at the use
+ if (!VNI)
+ continue;
+
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
+ WorkList.push_back(std::make_pair(Idx, VNI));
}
- // Handle dead values.
- bool CanSeparate = false;
- computeDeadValues(li, NewLR, &CanSeparate, dead);
+ // Create a new live ranges with only minimal live segments per def.
+ LiveRange NewLR;
+ createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end()));
+ extendSegmentsToUses(NewLR, *Indexes, WorkList, SR);
- // Move the trimmed segments back.
- li->segments.swap(NewLR.segments);
- DEBUG(dbgs() << "Shrunk: " << *li << '\n');
- return CanSeparate;
-}
+ // Move the trimmed ranges back.
+ SR.segments.swap(NewLR.segments);
-void LiveIntervals::computeDeadValues(LiveInterval *li,
- LiveRange &LR,
- bool *CanSeparate,
- SmallVectorImpl<MachineInstr*> *dead) {
- for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
- I != E; ++I) {
- VNInfo *VNI = *I;
+ // Remove dead PHI value numbers
+ for (auto VNI : SR.valnos) {
if (VNI->isUnused())
continue;
- LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def);
- assert(LRI != LR.end() && "Missing segment for PHI");
- if (LRI->end != VNI->def.getDeadSlot())
+ const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def);
+ assert(Segment != nullptr && "Missing segment for VNI");
+ if (Segment->end != VNI->def.getDeadSlot())
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
VNI->markUnused();
- LR.removeSegment(LRI->start, LRI->end);
+ SR.removeSegment(*Segment);
DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
- if (CanSeparate)
- *CanSeparate = true;
- } else {
- // This is a dead def. Make sure the instruction knows.
- MachineInstr *MI = getInstructionFromIndex(VNI->def);
- assert(MI && "No instruction defining live value");
- MI->addRegisterDead(li->reg, TRI);
- if (dead && MI->allDefsAreDead()) {
- DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
- dead->push_back(MI);
- }
}
}
+
+ DEBUG(dbgs() << "Shrunk: " << SR << '\n');
}
void LiveIntervals::extendToIndices(LiveRange &LR,
@@ -462,26 +572,25 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
LRCalc->extend(LR, Indices[i]);
}
-void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
+void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
SmallVectorImpl<SlotIndex> *EndPoints) {
- LiveQueryResult LRQ = LI->Query(Kill);
- VNInfo *VNI = LRQ.valueOut();
+ LiveQueryResult LRQ = LR.Query(Kill);
+ VNInfo *VNI = LRQ.valueOutOrDead();
if (!VNI)
return;
MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
- SlotIndex MBBStart, MBBEnd;
- std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+ SlotIndex MBBEnd = Indexes->getMBBEndIdx(KillMBB);
// If VNI isn't live out from KillMBB, the value is trivially pruned.
if (LRQ.endPoint() < MBBEnd) {
- LI->removeSegment(Kill, LRQ.endPoint());
+ LR.removeSegment(Kill, LRQ.endPoint());
if (EndPoints) EndPoints->push_back(LRQ.endPoint());
return;
}
// VNI is live out of KillMBB.
- LI->removeSegment(Kill, MBBEnd);
+ LR.removeSegment(Kill, MBBEnd);
if (EndPoints) EndPoints->push_back(MBBEnd);
// Find all blocks that are reachable from KillMBB without leaving VNI's live
@@ -498,8 +607,9 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
MachineBasicBlock *MBB = *I;
// Check if VNI is live in to MBB.
+ SlotIndex MBBStart, MBBEnd;
std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
- LiveQueryResult LRQ = LI->Query(MBBStart);
+ LiveQueryResult LRQ = LR.Query(MBBStart);
if (LRQ.valueIn() != VNI) {
// This block isn't part of the VNI segment. Prune the search.
I.skipChildren();
@@ -508,14 +618,14 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
// Prune the search if VNI is killed in MBB.
if (LRQ.endPoint() < MBBEnd) {
- LI->removeSegment(MBBStart, LRQ.endPoint());
+ LR.removeSegment(MBBStart, LRQ.endPoint());
if (EndPoints) EndPoints->push_back(LRQ.endPoint());
I.skipChildren();
continue;
}
// VNI is live through MBB.
- LI->removeSegment(MBBStart, MBBEnd);
+ LR.removeSegment(MBBStart, MBBEnd);
if (EndPoints) EndPoints->push_back(MBBEnd);
++I;
}
@@ -528,14 +638,17 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Keep track of regunit ranges.
- SmallVector<std::pair<LiveRange*, LiveRange::iterator>, 8> RU;
+ SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
+ // Keep track of subregister ranges.
+ SmallVector<std::pair<const LiveInterval::SubRange*,
+ LiveRange::const_iterator>, 4> SRs;
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
- LiveInterval *LI = &getInterval(Reg);
- if (LI->empty())
+ const LiveInterval &LI = getInterval(Reg);
+ if (LI.empty())
continue;
// Find the regunit intervals for the assigned register. They may overlap
@@ -543,15 +656,22 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
RU.clear();
for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
++Units) {
- LiveRange &RURanges = getRegUnit(*Units);
- if (RURanges.empty())
+ const LiveRange &RURange = getRegUnit(*Units);
+ if (RURange.empty())
continue;
- RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end)));
+ RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
+ }
+
+ if (MRI->tracksSubRegLiveness()) {
+ SRs.clear();
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
+ }
}
// Every instruction that kills Reg corresponds to a segment range end
// point.
- for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+ for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
++RI) {
// A block index indicates an MBB edge.
if (RI->end.isBlock())
@@ -568,23 +688,80 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// BAR %EAX<kill>
//
// There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
- bool CancelKill = false;
- for (unsigned u = 0, e = RU.size(); u != e; ++u) {
- LiveRange &RRanges = *RU[u].first;
- LiveRange::iterator &I = RU[u].second;
- if (I == RRanges.end())
+ for (auto &RUP : RU) {
+ const LiveRange &RURange = *RUP.first;
+ LiveRange::const_iterator &I = RUP.second;
+ if (I == RURange.end())
continue;
- I = RRanges.advanceTo(I, RI->end);
- if (I == RRanges.end() || I->start >= RI->end)
+ I = RURange.advanceTo(I, RI->end);
+ if (I == RURange.end() || I->start >= RI->end)
continue;
// I is overlapping RI.
- CancelKill = true;
- break;
+ goto CancelKill;
+ }
+
+ if (MRI->tracksSubRegLiveness()) {
+ // When reading a partial undefined value we must not add a kill flag.
+ // The regalloc might have used the undef lane for something else.
+ // Example:
+ // %vreg1 = ... ; R32: %vreg1
+ // %vreg2:high16 = ... ; R64: %vreg2
+ // = read %vreg2<kill> ; R64: %vreg2
+ // = read %vreg1 ; R32: %vreg1
+ // The <kill> flag is correct for %vreg2, but the register allocator may
+ // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0
+ // are actually never written by %vreg2. After assignment the <kill>
+ // flag at the read instruction is invalid.
+ unsigned DefinedLanesMask;
+ if (!SRs.empty()) {
+ // Compute a mask of lanes that are defined.
+ DefinedLanesMask = 0;
+ for (auto &SRP : SRs) {
+ const LiveInterval::SubRange &SR = *SRP.first;
+ LiveRange::const_iterator &I = SRP.second;
+ if (I == SR.end())
+ continue;
+ I = SR.advanceTo(I, RI->end);
+ if (I == SR.end() || I->start >= RI->end)
+ continue;
+ // I is overlapping RI
+ DefinedLanesMask |= SR.LaneMask;
+ }
+ } else
+ DefinedLanesMask = ~0u;
+
+ bool IsFullWrite = false;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (MO.isUse()) {
+ // Reading any undefined lanes?
+ unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ if ((UseMask & ~DefinedLanesMask) != 0)
+ goto CancelKill;
+ } else if (MO.getSubReg() == 0) {
+ // Writing to the full register?
+ assert(MO.isDef());
+ IsFullWrite = true;
+ }
+ }
+
+ // If an instruction writes to a subregister, a new segment starts in
+ // the LiveInterval. But as this is only overriding part of the register
+ // adding kill-flags is not correct here after registers have been
+ // assigned.
+ if (!IsFullWrite) {
+ // Next segment has to be adjacent in the subregister write case.
+ LiveRange::const_iterator N = std::next(RI);
+ if (N != LI.end() && N->start == RI->end)
+ goto CancelKill;
+ }
}
- if (CancelKill)
- MI->clearRegisterKills(Reg, nullptr);
- else
- MI->addRegisterKilled(Reg, nullptr);
+
+ MI->addRegisterKilled(Reg, nullptr);
+ continue;
+CancelKill:
+ MI->clearRegisterKills(Reg, nullptr);
}
}
}
@@ -615,9 +792,7 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
bool
LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
- I != E; ++I) {
- const VNInfo *PHI = *I;
+ for (const VNInfo *PHI : LI.valnos) {
if (PHI->isUnused() || !PHI->isPHIDef())
continue;
const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
@@ -767,7 +942,16 @@ public:
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
LiveInterval &LI = LIS.getInterval(Reg);
- updateRange(LI, Reg);
+ if (LI.hasSubRanges()) {
+ unsigned SubReg = MO->getSubReg();
+ unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask) == 0)
+ continue;
+ updateRange(S, Reg, S.LaneMask);
+ }
+ }
+ updateRange(LI, Reg, 0);
continue;
}
@@ -775,7 +959,7 @@ public:
// precomputed live range.
for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
if (LiveRange *LR = getRegUnitLI(*Units))
- updateRange(*LR, *Units);
+ updateRange(*LR, *Units, 0);
}
if (hasRegMask)
updateRegMaskSlots();
@@ -784,21 +968,24 @@ public:
private:
/// Update a single live range, assuming an instruction has been moved from
/// OldIdx to NewIdx.
- void updateRange(LiveRange &LR, unsigned Reg) {
+ void updateRange(LiveRange &LR, unsigned Reg, unsigned LaneMask) {
if (!Updated.insert(&LR).second)
return;
DEBUG({
dbgs() << " ";
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
dbgs() << PrintReg(Reg);
- else
+ if (LaneMask != 0)
+ dbgs() << format(" L%04X", LaneMask);
+ } else {
dbgs() << PrintRegUnit(Reg, &TRI);
+ }
dbgs() << ":\t" << LR << '\n';
});
if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
handleMoveDown(LR);
else
- handleMoveUp(LR, Reg);
+ handleMoveUp(LR, Reg, LaneMask);
DEBUG(dbgs() << " -->\t" << LR << '\n');
LR.verify();
}
@@ -911,7 +1098,7 @@ private:
/// Hoist kill to NewIdx, then scan for last kill between NewIdx and
/// OldIdx.
///
- void handleMoveUp(LiveRange &LR, unsigned Reg) {
+ void handleMoveUp(LiveRange &LR, unsigned Reg, unsigned LaneMask) {
// First look for a kill at OldIdx.
LiveRange::iterator I = LR.find(OldIdx.getBaseIndex());
LiveRange::iterator E = LR.end();
@@ -932,7 +1119,7 @@ private:
if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
// No def, search for the new kill.
// This can never be an early clobber kill since there is no def.
- std::prev(I)->end = findLastUseBefore(Reg).getRegSlot();
+ std::prev(I)->end = findLastUseBefore(Reg, LaneMask).getRegSlot();
return;
}
}
@@ -988,15 +1175,17 @@ private:
}
// Return the last use of reg between NewIdx and OldIdx.
- SlotIndex findLastUseBefore(unsigned Reg) {
+ SlotIndex findLastUseBefore(unsigned Reg, unsigned LaneMask) {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
SlotIndex LastUse = NewIdx;
- for (MachineRegisterInfo::use_instr_nodbg_iterator
- UI = MRI.use_instr_nodbg_begin(Reg),
- UE = MRI.use_instr_nodbg_end();
- UI != UE; ++UI) {
- const MachineInstr* MI = &*UI;
+ for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0 && LaneMask != 0
+ && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0)
+ continue;
+
+ const MachineInstr *MI = MO.getParent();
SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
if (InstSlot > LastUse && InstSlot < OldIdx)
LastUse = InstSlot;
@@ -1062,6 +1251,94 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
HME.updateAllRanges(MI);
}
+void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
+ const MachineBasicBlock::iterator End,
+ const SlotIndex endIdx,
+ LiveRange &LR, const unsigned Reg,
+ const unsigned LaneMask) {
+ LiveInterval::iterator LII = LR.find(endIdx);
+ SlotIndex lastUseIdx;
+ if (LII != LR.end() && LII->start < endIdx)
+ lastUseIdx = LII->end;
+ else
+ --LII;
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+
+ SlotIndex instrIdx = getInstructionIndex(MI);
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ bool isEndValid = getInstructionFromIndex(LII->end);
+
+ // FIXME: This doesn't currently handle early-clobber or multiple removed
+ // defs inside of the region to repair.
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ const MachineOperand &MO = *OI;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ unsigned Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((Mask & LaneMask) == 0)
+ continue;
+
+ if (MO.isDef()) {
+ if (!isStartValid) {
+ if (LII->end.isDead()) {
+ SlotIndex prevStart;
+ if (LII != LR.begin())
+ prevStart = std::prev(LII)->start;
+
+ // FIXME: This could be more efficient if there was a
+ // removeSegment method that returned an iterator.
+ LR.removeSegment(*LII, true);
+ if (prevStart.isValid())
+ LII = LR.find(prevStart);
+ else
+ LII = LR.begin();
+ } else {
+ LII->start = instrIdx.getRegSlot();
+ LII->valno->def = instrIdx.getRegSlot();
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ continue;
+ }
+ }
+
+ if (!lastUseIdx.isValid()) {
+ VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator);
+ LiveRange::Segment S(instrIdx.getRegSlot(),
+ instrIdx.getDeadSlot(), VNI);
+ LII = LR.addSegment(S);
+ } else if (LII->start != instrIdx.getRegSlot()) {
+ VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator);
+ LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI);
+ LII = LR.addSegment(S);
+ }
+
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ } else if (MO.isUse()) {
+ // FIXME: This should probably be handled outside of this branch,
+ // either as part of the def case (for defs inside of the region) or
+ // after the loop over the region.
+ if (!isEndValid && !LII->end.isBlock())
+ LII->end = instrIdx.getRegSlot();
+ if (!lastUseIdx.isValid())
+ lastUseIdx = instrIdx.getRegSlot();
+ }
+ }
+ }
+}
+
void
LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
MachineBasicBlock::iterator Begin,
@@ -1107,83 +1384,31 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
if (!LI.hasAtLeastOneValue())
continue;
- LiveInterval::iterator LII = LI.find(endIdx);
- SlotIndex lastUseIdx;
- if (LII != LI.end() && LII->start < endIdx)
- lastUseIdx = LII->end;
- else
- --LII;
-
- for (MachineBasicBlock::iterator I = End; I != Begin;) {
- --I;
- MachineInstr *MI = I;
- if (MI->isDebugValue())
- continue;
-
- SlotIndex instrIdx = getInstructionIndex(MI);
- bool isStartValid = getInstructionFromIndex(LII->start);
- bool isEndValid = getInstructionFromIndex(LII->end);
-
- // FIXME: This doesn't currently handle early-clobber or multiple removed
- // defs inside of the region to repair.
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- const MachineOperand &MO = *OI;
- if (!MO.isReg() || MO.getReg() != Reg)
- continue;
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask);
+ }
+ repairOldRegInRange(Begin, End, endIdx, LI, Reg);
+ }
+}
- if (MO.isDef()) {
- if (!isStartValid) {
- if (LII->end.isDead()) {
- SlotIndex prevStart;
- if (LII != LI.begin())
- prevStart = std::prev(LII)->start;
-
- // FIXME: This could be more efficient if there was a
- // removeSegment method that returned an iterator.
- LI.removeSegment(*LII, true);
- if (prevStart.isValid())
- LII = LI.find(prevStart);
- else
- LII = LI.begin();
- } else {
- LII->start = instrIdx.getRegSlot();
- LII->valno->def = instrIdx.getRegSlot();
- if (MO.getSubReg() && !MO.isUndef())
- lastUseIdx = instrIdx.getRegSlot();
- else
- lastUseIdx = SlotIndex();
- continue;
- }
- }
+void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (LiveRange *LR = getCachedRegUnit(*Units))
+ if (VNInfo *VNI = LR->getVNInfoAt(Pos))
+ LR->removeValNo(VNI);
+ }
+}
- if (!lastUseIdx.isValid()) {
- VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
- VNInfoAllocator);
- LiveRange::Segment S(instrIdx.getRegSlot(),
- instrIdx.getDeadSlot(), VNI);
- LII = LI.addSegment(S);
- } else if (LII->start != instrIdx.getRegSlot()) {
- VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
- VNInfoAllocator);
- LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI);
- LII = LI.addSegment(S);
- }
+void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
+ VNInfo *VNI = LI.getVNInfoAt(Pos);
+ if (VNI == nullptr)
+ return;
+ LI.removeValNo(VNI);
- if (MO.getSubReg() && !MO.isUndef())
- lastUseIdx = instrIdx.getRegSlot();
- else
- lastUseIdx = SlotIndex();
- } else if (MO.isUse()) {
- // FIXME: This should probably be handled outside of this branch,
- // either as part of the def case (for defs inside of the region) or
- // after the loop over the region.
- if (!isEndValid && !LII->end.isBlock())
- LII->end = instrIdx.getRegSlot();
- if (!lastUseIdx.isValid())
- lastUseIdx = instrIdx.getRegSlot();
- }
- }
- }
+ // Also remove the value in subranges.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if (VNInfo *SVNI = S.getVNInfoAt(Pos))
+ S.removeValNo(SVNI);
}
+ LI.removeEmptySubRanges();
}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index d81221b..025d99c 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -26,14 +26,14 @@ using namespace llvm;
// Merge a LiveInterval's segments. Guarantee no overlaps.
-void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
- if (VirtReg.empty())
+void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
+ if (Range.empty())
return;
++Tag;
// Insert each of the virtual register's live segments into the map.
- LiveInterval::iterator RegPos = VirtReg.begin();
- LiveInterval::iterator RegEnd = VirtReg.end();
+ LiveRange::const_iterator RegPos = Range.begin();
+ LiveRange::const_iterator RegEnd = Range.end();
SegmentIter SegPos = Segments.find(RegPos->start);
while (SegPos.valid()) {
@@ -53,14 +53,14 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
}
// Remove a live virtual register's segments from this union.
-void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
- if (VirtReg.empty())
+void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) {
+ if (Range.empty())
return;
++Tag;
// Remove each of the virtual register's live segments from the map.
- LiveInterval::iterator RegPos = VirtReg.begin();
- LiveInterval::iterator RegEnd = VirtReg.end();
+ LiveRange::const_iterator RegPos = Range.begin();
+ LiveRange::const_iterator RegEnd = Range.end();
SegmentIter SegPos = Segments.find(RegPos->start);
for (;;) {
@@ -70,7 +70,7 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
return;
// Skip all segments that may have been coalesced.
- RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+ RegPos = Range.advanceTo(RegPos, SegPos.start());
if (RegPos == RegEnd)
return;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index a558e14..d804b39 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -19,6 +19,13 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
+void LiveRangeCalc::resetLiveOutMap() {
+ unsigned NumBlocks = MF->getNumBlockIDs();
+ Seen.clear();
+ Seen.resize(NumBlocks);
+ Map.resize(NumBlocks);
+}
+
void LiveRangeCalc::reset(const MachineFunction *mf,
SlotIndexes *SI,
MachineDominatorTree *MDT,
@@ -28,126 +35,207 @@ void LiveRangeCalc::reset(const MachineFunction *mf,
Indexes = SI;
DomTree = MDT;
Alloc = VNIA;
-
- unsigned N = MF->getNumBlockIDs();
- Seen.clear();
- Seen.resize(N);
- LiveOut.resize(N);
+ resetLiveOutMap();
LiveIn.clear();
}
-void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
+static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
+ LiveRange &LR, const MachineOperand &MO) {
+ const MachineInstr *MI = MO.getParent();
+ SlotIndex DefIdx =
+ Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
+
+ // Create the def in LR. This may find an existing def.
+ LR.createDeadDef(DefIdx, Alloc);
+}
+
+void LiveRangeCalc::calculate(LiveInterval &LI) {
assert(MRI && Indexes && "call reset() first");
+ // Step 1: Create minimal live segments for every definition of Reg.
// Visit all def operands. If the same instruction has multiple defs of Reg,
- // LR.createDeadDef() will deduplicate.
- for (MachineOperand &MO : MRI->def_operands(Reg)) {
- const MachineInstr *MI = MO.getParent();
- // Find the corresponding slot index.
- SlotIndex Idx;
- if (MI->isPHI())
- // PHI defs begin at the basic block start index.
- Idx = Indexes->getMBBStartIdx(MI->getParent());
- else
- // Instructions are either normal 'r', or early clobber 'e'.
- Idx = Indexes->getInstructionIndex(MI)
- .getRegSlot(MO.isEarlyClobber());
+ // createDeadDef() will deduplicate.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ unsigned Reg = LI.reg;
+ for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
- // Create the def in LR. This may find an existing def.
- LR.createDeadDef(Idx, *Alloc);
+ unsigned SubReg = MO.getSubReg();
+ if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) {
+ unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+
+ // If this is the first time we see a subregister def, initialize
+ // subranges by creating a copy of the main range.
+ if (!LI.hasSubRanges() && !LI.empty()) {
+ unsigned ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
+ LI.createSubRangeFrom(*Alloc, ClassMask, LI);
+ }
+
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ // A Mask for subregs common to the existing subrange and current def.
+ unsigned Common = S.LaneMask & Mask;
+ if (Common == 0)
+ continue;
+ // A Mask for subregs covered by the subrange but not the current def.
+ unsigned LRest = S.LaneMask & ~Mask;
+ LiveInterval::SubRange *CommonRange;
+ if (LRest != 0) {
+ // Split current subrange into Common and LRest ranges.
+ S.LaneMask = LRest;
+ CommonRange = LI.createSubRangeFrom(*Alloc, Common, S);
+ } else {
+ assert(Common == S.LaneMask);
+ CommonRange = &S;
+ }
+ if (MO.isDef())
+ createDeadDef(*Indexes, *Alloc, *CommonRange, MO);
+ Mask &= ~Common;
+ }
+ // Create a new SubRange for subregs we did not cover yet.
+ if (Mask != 0) {
+ LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask);
+ if (MO.isDef())
+ createDeadDef(*Indexes, *Alloc, *NewRange, MO);
+ }
+ }
+
+ // Create the def in the main liverange. We do not have to do this if
+ // subranges are tracked as we recreate the main range later in this case.
+ if (MO.isDef() && !LI.hasSubRanges())
+ createDeadDef(*Indexes, *Alloc, LI, MO);
+ }
+
+ // We may have created empty live ranges for partially undefined uses, we
+ // can't keep them because we won't find defs in them later.
+ LI.removeEmptySubRanges();
+
+ // Step 2: Extend live segments to all uses, constructing SSA form as
+ // necessary.
+ if (LI.hasSubRanges()) {
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ resetLiveOutMap();
+ extendToUses(S, Reg, S.LaneMask);
+ }
+ LI.clear();
+ LI.constructMainRangeFromSubranges(*Indexes, *Alloc);
+ } else {
+ resetLiveOutMap();
+ extendToUses(LI, Reg, ~0u);
}
}
-void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) {
+void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
assert(MRI && Indexes && "call reset() first");
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // LR.createDeadDef() will deduplicate.
+ for (MachineOperand &MO : MRI->def_operands(Reg))
+ createDeadDef(*Indexes, *Alloc, LR, MO);
+}
+
+
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) {
// Visit all operands that read Reg. This may include partial defs.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
// Clear all kill flags. They will be reinserted after register allocation
// by LiveIntervalAnalysis::addKillFlags().
if (MO.isUse())
MO.setIsKill(false);
+ else {
+ // We only care about uses, but on the main range (mask ~0u) this includes
+ // the "virtual" reads happening for subregister defs.
+ if (Mask != ~0u)
+ continue;
+ }
+
if (!MO.readsReg())
continue;
- // MI is reading Reg. We may have visited MI before if it happens to be
- // reading Reg multiple times. That is OK, extend() is idempotent.
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ unsigned SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ // Ignore uses not covering the current subrange.
+ if ((SubRegMask & Mask) == 0)
+ continue;
+ }
+
+ // Determine the actual place of the use.
const MachineInstr *MI = MO.getParent();
unsigned OpNo = (&MO - &MI->getOperand(0));
-
- // Find the SlotIndex being read.
- SlotIndex Idx;
+ SlotIndex UseIdx;
if (MI->isPHI()) {
assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
- // PHI operands are paired: (Reg, PredMBB).
- // Extend the live range to be live-out from PredMBB.
- Idx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB());
+ // The actual place where a phi operand is used is the end of the pred
+ // MBB. PHI operands are paired: (Reg, PredMBB).
+ UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB());
} else {
- // This is a normal instruction.
- Idx = Indexes->getInstructionIndex(MI).getRegSlot();
// Check for early-clobber redefs.
+ bool isEarlyClobber = false;
unsigned DefIdx;
- if (MO.isDef()) {
- if (MO.isEarlyClobber())
- Idx = Idx.getRegSlot(true);
- } else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) {
+ if (MO.isDef())
+ isEarlyClobber = MO.isEarlyClobber();
+ else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) {
// FIXME: This would be a lot easier if tied early-clobber uses also
// had an early-clobber flag.
- if (MI->getOperand(DefIdx).isEarlyClobber())
- Idx = Idx.getRegSlot(true);
+ isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber();
}
+ UseIdx = Indexes->getInstructionIndex(MI).getRegSlot(isEarlyClobber);
}
- extend(LR, Idx, Reg);
+
+ // MI is reading Reg. We may have visited MI before if it happens to be
+ // reading Reg multiple times. That is OK, extend() is idempotent.
+ extend(LR, UseIdx, Reg);
}
}
-// Transfer information from the LiveIn vector to the live ranges.
-void LiveRangeCalc::updateLiveIns() {
+void LiveRangeCalc::updateFromLiveIns() {
LiveRangeUpdater Updater;
- for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
- E = LiveIn.end(); I != E; ++I) {
- if (!I->DomNode)
+ for (const LiveInBlock &I : LiveIn) {
+ if (!I.DomNode)
continue;
- MachineBasicBlock *MBB = I->DomNode->getBlock();
- assert(I->Value && "No live-in value found");
+ MachineBasicBlock *MBB = I.DomNode->getBlock();
+ assert(I.Value && "No live-in value found");
SlotIndex Start, End;
std::tie(Start, End) = Indexes->getMBBRange(MBB);
- if (I->Kill.isValid())
+ if (I.Kill.isValid())
// Value is killed inside this block.
- End = I->Kill;
+ End = I.Kill;
else {
// The value is live-through, update LiveOut as well.
// Defer the Domtree lookup until it is needed.
assert(Seen.test(MBB->getNumber()));
- LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)nullptr);
+ Map[MBB] = LiveOutPair(I.Value, nullptr);
}
- Updater.setDest(&I->LR);
- Updater.add(Start, End, I->Value);
+ Updater.setDest(&I.LR);
+ Updater.add(Start, End, I.Value);
}
LiveIn.clear();
}
-void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) {
- assert(Kill.isValid() && "Invalid SlotIndex");
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
+ assert(Use.isValid() && "Invalid SlotIndex");
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
- MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
- assert(KillMBB && "No MBB at Kill");
+ MachineBasicBlock *UseMBB = Indexes->getMBBFromIndex(Use.getPrevSlot());
+ assert(UseMBB && "No MBB at Use");
// Is there a def in the same MBB we can extend?
- if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
+ if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use))
return;
- // Find the single reaching def, or determine if Kill is jointly dominated by
+ // Find the single reaching def, or determine if Use is jointly dominated by
// multiple values, and we may need to create even more phi-defs to preserve
// VNInfo SSA form. Perform a search for all predecessor blocks where we
// know the dominating VNInfo.
- if (findReachingDefs(LR, *KillMBB, Kill, PhysReg))
+ if (findReachingDefs(LR, *UseMBB, Use, PhysReg))
return;
// When there were multiple different values, we may need new PHIs.
@@ -162,16 +250,16 @@ void LiveRangeCalc::calculateValues() {
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
updateSSA();
- updateLiveIns();
+ updateFromLiveIns();
}
-bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
- SlotIndex Kill, unsigned PhysReg) {
- unsigned KillMBBNum = KillMBB.getNumber();
+bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
+ SlotIndex Use, unsigned PhysReg) {
+ unsigned UseMBBNum = UseMBB.getNumber();
// Block numbers where LR should be live-in.
- SmallVector<unsigned, 16> WorkList(1, KillMBBNum);
+ SmallVector<unsigned, 16> WorkList(1, UseMBBNum);
// Remember if we have seen more than one value.
bool UniqueVNI = true;
@@ -202,7 +290,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
// Is this a known live-out block?
if (Seen.test(Pred->getNumber())) {
- if (VNInfo *VNI = LiveOut[Pred].first) {
+ if (VNInfo *VNI = Map[Pred].first) {
if (TheVNI && TheVNI != VNI)
UniqueVNI = false;
TheVNI = VNI;
@@ -225,11 +313,11 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
}
// No, we need a live-in value for Pred as well
- if (Pred != &KillMBB)
+ if (Pred != &UseMBB)
WorkList.push_back(Pred->getNumber());
else
- // Loopback to KillMBB, so value is really live through.
- Kill = SlotIndex();
+ // Loopback to UseMBB, so value is really live through.
+ Use = SlotIndex();
}
}
@@ -247,12 +335,11 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
E = WorkList.end(); I != E; ++I) {
SlotIndex Start, End;
std::tie(Start, End) = Indexes->getMBBRange(*I);
- // Trim the live range in KillMBB.
- if (*I == KillMBBNum && Kill.isValid())
- End = Kill;
+ // Trim the live range in UseMBB.
+ if (*I == UseMBBNum && Use.isValid())
+ End = Use;
else
- LiveOut[MF->getBlockNumbered(*I)] =
- LiveOutPair(TheVNI, nullptr);
+ Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr);
Updater.add(Start, End, TheVNI);
}
return true;
@@ -265,8 +352,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
addLiveInBlock(LR, DomTree->getNode(MBB));
- if (MBB == &KillMBB)
- LiveIn.back().Kill = Kill;
+ if (MBB == &UseMBB)
+ LiveIn.back().Kill = Use;
}
return false;
@@ -285,9 +372,8 @@ void LiveRangeCalc::updateSSA() {
Changes = 0;
// Propagate live-out values down the dominator tree, inserting phi-defs
// when necessary.
- for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
- E = LiveIn.end(); I != E; ++I) {
- MachineDomTreeNode *Node = I->DomNode;
+ for (LiveInBlock &I : LiveIn) {
+ MachineDomTreeNode *Node = I.DomNode;
// Skip block if the live-in value has already been determined.
if (!Node)
continue;
@@ -303,16 +389,16 @@ void LiveRangeCalc::updateSSA() {
// immediate dominator. Check if any of them have live-out values that are
// properly dominated by IDom. If so, we need a phi-def here.
if (!needPHI) {
- IDomValue = LiveOut[IDom->getBlock()];
+ IDomValue = Map[IDom->getBlock()];
// Cache the DomTree node that defined the value.
if (IDomValue.first && !IDomValue.second)
- LiveOut[IDom->getBlock()].second = IDomValue.second =
+ Map[IDom->getBlock()].second = IDomValue.second =
DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
- LiveOutPair &Value = LiveOut[*PI];
+ LiveOutPair &Value = Map[*PI];
if (!Value.first || Value.first == IDomValue.first)
continue;
@@ -334,7 +420,7 @@ void LiveRangeCalc::updateSSA() {
// The value may be live-through even if Kill is set, as can happen when
// we are called from extendRange. In that case LiveOutSeen is true, and
// LiveOut indicates a foreign or missing value.
- LiveOutPair &LOP = LiveOut[MBB];
+ LiveOutPair &LOP = Map[MBB];
// Create a phi-def if required.
if (needPHI) {
@@ -342,25 +428,25 @@ void LiveRangeCalc::updateSSA() {
assert(Alloc && "Need VNInfo allocator to create PHI-defs");
SlotIndex Start, End;
std::tie(Start, End) = Indexes->getMBBRange(MBB);
- LiveRange &LR = I->LR;
+ LiveRange &LR = I.LR;
VNInfo *VNI = LR.getNextValue(Start, *Alloc);
- I->Value = VNI;
+ I.Value = VNI;
// This block is done, we know the final value.
- I->DomNode = nullptr;
+ I.DomNode = nullptr;
- // Add liveness since updateLiveIns now skips this node.
- if (I->Kill.isValid())
- LR.addSegment(LiveInterval::Segment(Start, I->Kill, VNI));
+ // Add liveness since updateFromLiveIns now skips this node.
+ if (I.Kill.isValid())
+ LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
else {
LR.addSegment(LiveInterval::Segment(Start, End, VNI));
LOP = LiveOutPair(VNI, Node);
}
} else if (IDomValue.first) {
// No phi-def here. Remember incoming value.
- I->Value = IDomValue.first;
+ I.Value = IDomValue.first;
// If the IDomValue is killed in the block, don't propagate through.
- if (I->Kill.isValid())
+ if (I.Kill.isValid())
continue;
// Propagate IDomValue if it isn't killed:
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 345d6c4..90bf971 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -40,12 +40,6 @@ class LiveRangeCalc {
MachineDominatorTree *DomTree;
VNInfo::Allocator *Alloc;
- /// Seen - Bit vector of active entries in LiveOut, also used as a visited
- /// set by findReachingDefs. One entry per basic block, indexed by block
- /// number. This is kept as a separate bit vector because it can be cleared
- /// quickly when switching live ranges.
- BitVector Seen;
-
/// LiveOutPair - A value and the block that defined it. The domtree node is
/// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
@@ -53,8 +47,14 @@ class LiveRangeCalc {
/// LiveOutMap - Map basic blocks to the value leaving the block.
typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
- /// LiveOut - Map each basic block where a live range is live out to the
- /// live-out value and its defining block.
+ /// Bit vector of active entries in LiveOut, also used as a visited set by
+ /// findReachingDefs. One entry per basic block, indexed by block number.
+ /// This is kept as a separate bit vector because it can be cleared quickly
+ /// when switching live ranges.
+ BitVector Seen;
+
+ /// Map each basic block where a live range is live out to the live-out value
+ /// and its defining block.
///
/// For every basic block, MBB, one of these conditions shall be true:
///
@@ -70,7 +70,7 @@ class LiveRangeCalc {
///
/// The map can be shared by multiple live ranges as long as no two are
/// live-out of the same block.
- LiveOutMap LiveOut;
+ LiveOutMap Map;
/// LiveInBlock - Information about a basic block where a live range is known
/// to be live-in, but the value has not yet been determined.
@@ -101,17 +101,17 @@ class LiveRangeCalc {
/// used to add entries directly.
SmallVector<LiveInBlock, 16> LiveIn;
- /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set
- /// of defs that can reach it.
+ /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can
+ /// reach it.
///
- /// If only one def can reach Kill, all paths from the def to kill are added
- /// to LI, and the function returns true.
+ /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB
+ /// are added to @p LR, and the function returns true.
///
- /// If multiple values can reach Kill, the blocks that need LI to be live in
- /// are added to the LiveIn array, and the function returns false.
+ /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
+ /// live in are added to the LiveIn array, and the function returns false.
///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
- bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
+ bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
SlotIndex Kill, unsigned PhysReg);
/// updateSSA - Compute the values that will be live in to all requested
@@ -121,8 +121,18 @@ class LiveRangeCalc {
/// blocks. No values are read from the live ranges.
void updateSSA();
- /// Add liveness as specified in the LiveIn vector.
- void updateLiveIns();
+ /// Transfer information from the LiveIn vector to the live ranges and update
+ /// the given @p LiveOuts.
+ void updateFromLiveIns();
+
+ /// Extend the live range of @p LR to reach all uses of Reg.
+ ///
+ /// All uses must be jointly dominated by existing liveness. PHI-defs are
+ /// inserted as needed to preserve SSA form.
+ void extendToUses(LiveRange &LR, unsigned Reg, unsigned LaneMask);
+
+ /// Reset Map and Seen fields.
+ void resetLiveOutMap();
public:
LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr),
@@ -152,37 +162,33 @@ public:
// Modify existing live ranges.
//
- /// extend - Extend the live range of LI to reach Kill.
+ /// Extend the live range of @p LR to reach @p Use.
///
- /// The existing values in LI must be live so they jointly dominate Kill. If
- /// Kill is not dominated by a single existing value, PHI-defs are inserted
- /// as required to preserve SSA form. If Kill is known to be dominated by a
- /// single existing value, Alloc may be null.
+ /// The existing values in @p LR must be live so they jointly dominate @p Use.
+ /// If @p Use is not dominated by a single existing value, PHI-defs are
+ /// inserted as required to preserve SSA form.
///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
- void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0);
+ void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0);
/// createDeadDefs - Create a dead def in LI for every def operand of Reg.
/// Each instruction defining Reg gets a new VNInfo with a corresponding
/// minimal live range.
void createDeadDefs(LiveRange &LR, unsigned Reg);
- /// createDeadDefs - Create a dead def in LI for every def of LI->reg.
- void createDeadDefs(LiveInterval &LI) {
- createDeadDefs(LI, LI.reg);
- }
-
- /// extendToUses - Extend the live range of LI to reach all uses of Reg.
+ /// Extend the live range of @p LR to reach all uses of Reg.
///
/// All uses must be jointly dominated by existing liveness. PHI-defs are
/// inserted as needed to preserve SSA form.
- void extendToUses(LiveRange &LR, unsigned Reg);
-
- /// extendToUses - Extend the live range of LI to reach all uses of LI->reg.
- void extendToUses(LiveInterval &LI) {
- extendToUses(LI, LI.reg);
+ void extendToUses(LiveRange &LR, unsigned PhysReg) {
+ extendToUses(LR, PhysReg, ~0u);
}
+ /// Calculates liveness for the register specified in live interval @p LI.
+ /// Creates subregister live ranges as needed if subreg liveness tracking is
+ /// enabled.
+ void calculate(LiveInterval &LI);
+
//===--------------------------------------------------------------------===//
// Low-level interface.
//===--------------------------------------------------------------------===//
@@ -204,7 +210,7 @@ public:
/// addLiveInBlock().
void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
Seen.set(MBB->getNumber());
- LiveOut[MBB] = LiveOutPair(VNI, nullptr);
+ Map[MBB] = LiveOutPair(VNI, nullptr);
}
/// addLiveInBlock - Add a block with an unknown live-in value. This
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index a0fb712..0edc897 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -60,9 +60,7 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
}
void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
- for (LiveInterval::vni_iterator I = getParent().vni_begin(),
- E = getParent().vni_end(); I != E; ++I) {
- VNInfo *VNI = *I;
+ for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
@@ -258,15 +256,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Check if MI reads any unreserved physregs.
if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
- else if (MOI->isDef()) {
- for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo());
- Units.isValid(); ++Units) {
- if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
- if (VNInfo *VNI = LR->getVNInfoAt(Idx))
- LR->removeValNo(VNI);
- }
- }
- }
+ else if (MOI->isDef())
+ LIS.removePhysRegDefAt(Reg, Idx);
continue;
}
LiveInterval &LI = LIS.getInterval(Reg);
@@ -282,13 +273,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Remove defined value.
if (MOI->isDef()) {
- if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
- if (TheDelegate)
- TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
- LI.removeValNo(VNI);
- if (LI.empty())
- RegsToErase.push_back(Reg);
- }
+ if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+ LIS.removeVRegDefAt(LI, Idx);
+ if (LI.empty())
+ RegsToErase.push_back(Reg);
}
}
@@ -410,7 +399,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI);
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
- if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
+ if (MRI.recomputeRegClass(LI.reg))
DEBUG({
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index a8cae08..154ce6f 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -71,16 +72,44 @@ void LiveRegMatrix::releaseMemory() {
}
}
+template<typename Callable>
+bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval,
+ unsigned PhysReg, Callable Func) {
+ if (VRegInterval.hasSubRanges()) {
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ unsigned Mask = (*Units).second;
+ for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
+ if (S.LaneMask & Mask) {
+ if (Func(Unit, S))
+ return true;
+ break;
+ }
+ }
+ }
+ } else {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ if (Func(*Units, VRegInterval))
+ return true;
+ }
+ }
+ return false;
+}
+
void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
<< " to " << PrintReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
MRI->setPhysRegUsed(PhysReg);
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
- Matrix[*Units].unify(VirtReg);
- }
+
+ foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
+ const LiveRange &Range) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << ' ' << Range);
+ Matrix[Unit].unify(VirtReg, Range);
+ return false;
+ });
+
++NumAssigned;
DEBUG(dbgs() << '\n');
}
@@ -90,10 +119,14 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
<< " from " << PrintReg(PhysReg, TRI) << ':');
VRM->clearVirt(VirtReg.reg);
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
- Matrix[*Units].extract(VirtReg);
- }
+
+ foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
+ const LiveRange &Range) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI));
+ Matrix[Unit].extract(VirtReg, Range);
+ return false;
+ });
+
++NumUnassigned;
DEBUG(dbgs() << '\n');
}
@@ -121,12 +154,13 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
if (VirtReg.empty())
return false;
CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- const LiveRange &UnitRange = LIS->getRegUnit(*Units);
- if (VirtReg.overlaps(UnitRange, CP, *LIS->getSlotIndexes()))
- return true;
- }
- return false;
+
+ bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
+ const LiveRange &Range) {
+ const LiveRange &UnitRange = LIS->getRegUnit(Unit);
+ return Range.overlaps(UnitRange, CP, *LIS->getSlotIndexes());
+ });
+ return Result;
}
LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 5c5712f..e8bf687 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -291,6 +291,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Debug value, stackmap and patchpoint instructions can't be out of
// range, so they don't need any updates.
if (MI->isDebugValue() ||
+ MI->getOpcode() == TargetOpcode::STATEPOINT ||
MI->getOpcode() == TargetOpcode::STACKMAP ||
MI->getOpcode() == TargetOpcode::PATCHPOINT)
continue;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 3058b1a..3c73905 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/LeakDetector.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/Debug.h"
@@ -45,7 +44,6 @@ MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
}
MachineBasicBlock::~MachineBasicBlock() {
- LeakDetector::removeGarbageObject(this);
}
/// getSymbol - Return the MCSymbol for this basic block.
@@ -54,9 +52,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
if (!CachedMCSymbol) {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
- const TargetMachine &TM = MF->getTarget();
- const char *Prefix =
- TM.getSubtargetImpl()->getDataLayout()->getPrivateGlobalPrefix();
+ const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
"_" + Twine(getNumber()));
@@ -87,14 +83,11 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
for (MachineBasicBlock::instr_iterator
I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
I->AddRegOperandsToUseLists(RegInfo);
-
- LeakDetector::removeGarbageObject(N);
}
void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
N->getParent()->removeFromMBBNumbering(N->Number);
N->Number = -1;
- LeakDetector::addGarbageObject(N);
}
@@ -109,8 +102,6 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
// use/def lists.
MachineFunction *MF = Parent->getParent();
N->AddRegOperandsToUseLists(MF->getRegInfo());
-
- LeakDetector::removeGarbageObject(N);
}
/// removeNodeFromList (MI) - When we remove an instruction from a basic block
@@ -124,8 +115,6 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
N->setParent(nullptr);
-
- LeakDetector::addGarbageObject(N);
}
/// transferNodesFromList (MI) - When moving a range of instructions from one
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 08fd200..1b5c1f1 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -347,65 +347,61 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
uint32_t WeightScale = 0;
uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end();
- SI != SE; ++SI) {
- if (BlockFilter && !BlockFilter->count(*SI))
+ for (MachineBasicBlock *Succ : BB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
continue;
- BlockChain &SuccChain = *BlockToChain[*SI];
+ BlockChain &SuccChain = *BlockToChain[Succ];
if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n");
continue;
}
- if (*SI != *SuccChain.begin()) {
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n");
+ if (Succ != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
continue;
}
- uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+ uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
// Only consider successors which are either "hot", or wouldn't violate
// any CFG constraints.
if (SuccChain.LoopPredecessors != 0) {
if (SuccProb < HotProb) {
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
<< " (prob) (CFG conflict)\n");
continue;
}
- // Make sure that a hot successor doesn't have a globally more important
- // predecessor.
- BlockFrequency CandidateEdgeFreq
- = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+ // Make sure that a hot successor doesn't have a globally more
+ // important predecessor.
+ BlockFrequency CandidateEdgeFreq =
+ MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
bool BadCFGConflict = false;
- for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
- PE = (*SI)->pred_end();
- PI != PE; ++PI) {
- if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
- BlockToChain[*PI] == &Chain)
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
continue;
- BlockFrequency PredEdgeFreq
- = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+ BlockFrequency PredEdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
if (PredEdgeFreq >= CandidateEdgeFreq) {
BadCFGConflict = true;
break;
}
}
if (BadCFGConflict) {
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
<< " (prob) (non-cold CFG conflict)\n");
continue;
}
}
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
<< " (prob)"
<< (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
<< "\n");
if (BestSucc && BestWeight >= SuccWeight)
continue;
- BestSucc = *SI;
+ BestSucc = Succ;
BestWeight = SuccWeight;
}
return BestSucc;
@@ -1043,12 +1039,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+ if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
return;
- unsigned Align = TLI->getPrefLoopAlignment();
- if (!Align)
- return; // Don't care about loop alignment.
if (FunctionChain.begin() == FunctionChain.end())
return; // Empty chain.
@@ -1066,6 +1058,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!L)
continue;
+ unsigned Align = TLI->getPrefLoopAlignment(L);
+ if (!Align)
+ continue; // Don't care about loop alignment.
+
// If the block is cold relative to the function entry don't waste space
// aligning it.
BlockFrequency Freq = MBFI->getBlockFreq(*BI);
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index ae26967..21b9c5a 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -451,6 +451,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
SmallVector<unsigned, 2> ImplicitDefsToUpdate;
+ SmallVector<unsigned, 2> ImplicitDefs;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
MachineInstr *MI = &*I;
++I;
@@ -542,6 +543,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// we should make sure it is not dead at CSMI.
if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
ImplicitDefsToUpdate.push_back(i);
+
+ // Keep track of implicit defs of CSMI and MI, to clear possibly
+ // made-redundant kill flags.
+ if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
+ ImplicitDefs.push_back(OldReg);
+
if (OldReg == NewReg) {
--NumDefs;
continue;
@@ -573,8 +580,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Actually perform the elimination.
if (DoCSE) {
for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
- MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
- MRI->clearKillFlags(CSEPairs[i].second);
+ unsigned OldReg = CSEPairs[i].first;
+ unsigned NewReg = CSEPairs[i].second;
+ // OldReg may have been unused but is used now, clear the Dead flag
+ MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
+ assert(Def != nullptr && "CSEd register has no unique definition?");
+ Def->clearRegisterDeads(NewReg);
+ // Replace with NewReg and clear kill flags which may be wrong now.
+ MRI->replaceRegWith(OldReg, NewReg);
+ MRI->clearKillFlags(NewReg);
}
// Go through implicit defs of CSMI and MI, if a def is not dead at MI,
@@ -582,6 +596,29 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+ // Go through implicit defs of CSMI and MI, and clear the kill flags on
+ // their uses in all the instructions between CSMI and MI.
+ // We might have made some of the kill flags redundant, consider:
+ // subs ... %NZCV<imp-def> <- CSMI
+ // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore
+ // subs ... %NZCV<imp-def> <- MI, to be eliminated
+ // csinc ... %NZCV<imp-use,kill>
+ // Since we eliminated MI, and reused a register imp-def'd by CSMI
+ // (here %NZCV), that register, if it was killed before MI, should have
+ // that kill flag removed, because it's lifetime was extended.
+ if (CSMI->getParent() == MI->getParent()) {
+ for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
+ for (auto ImplicitDef : ImplicitDefs)
+ if (MachineOperand *MO = II->findRegisterUseOperand(
+ ImplicitDef, /*isKill=*/true, TRI))
+ MO->setIsKill(false);
+ } else {
+ // If the instructions aren't in the same BB, bail out and clear the
+ // kill flag on all uses of the imp-def'd register.
+ for (auto ImplicitDef : ImplicitDefs)
+ MRI->clearKillFlags(ImplicitDef);
+ }
+
if (CrossMBBPhysDef) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
@@ -606,6 +643,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
}
CSEPairs.clear();
ImplicitDefsToUpdate.clear();
+ ImplicitDefs.clear();
}
return Changed;
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index 2931258..41045ac 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -45,7 +45,7 @@ class MachineCombiner : public MachineFunctionPass {
TargetSchedModel TSchedModel;
- /// OptSize - True if optimizing for code size.
+ /// True if optimizing for code size.
bool OptSize;
public:
@@ -109,7 +109,7 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
return DefInstr;
}
-/// getDepth - Computes depth of instructions in vector \InsInstr.
+/// Computes depth of instructions in vector \InsInstr.
///
/// \param InsInstrs is a vector of machine instructions
/// \param InstrIdxForVirtReg is a dense map of virtual register to index
@@ -125,7 +125,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVector<unsigned, 16> InstrDepth;
assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
- // Foreach instruction in in the new sequence compute the depth based on the
+ // For each instruction in the new sequence compute the depth based on the
// operands. Use the trace information when possible. For new operands which
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
for (auto *InstrPtr : InsInstrs) { // for each Use
@@ -169,8 +169,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
return InstrDepth[NewRootIdx];
}
-/// getLatency - Computes instruction latency as max of latency of defined
-/// operands
+/// Computes instruction latency as max of latency of defined operands.
///
/// \param Root is a machine instruction that could be replaced by NewRoot.
/// It is used to compute a more accurate latency information for NewRoot in
@@ -211,12 +210,12 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
return NewRootLatency;
}
-/// preservesCriticalPathlen - True when the new instruction sequence does not
+/// True when the new instruction sequence does not
/// lengthen the critical path. The DAGCombine code sequence ends in MI
/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
/// necessary condition for the new sequence to replace the old sequence is that
-/// is cannot lengthen the critical path. This is decided by the formula
-/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
+/// it cannot lengthen the critical path. This is decided by the formula
+/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
/// The slack is the number of cycles Root can be delayed before the critical
/// patch becomes longer.
bool MachineCombiner::preservesCriticalPathLen(
@@ -264,8 +263,7 @@ void MachineCombiner::instr2instrSC(
InstrsSC.push_back(SC);
}
}
-/// preservesResourceLen - True when the new instructions do not increase
-/// resource length
+/// True when the new instructions do not increase resource length
bool MachineCombiner::preservesResourceLen(
MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -300,7 +298,7 @@ bool MachineCombiner::preservesResourceLen(
}
/// \returns true when new instruction sequence should be generated
-/// independent if it lenghtens critical path or not
+/// independent if it lengthens critical path or not
bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
if (OptSize && (NewSize < OldSize))
return true;
@@ -309,7 +307,7 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
return false;
}
-/// combineInstructions - substitute a slow code sequence with a faster one by
+/// Substitute a slow code sequence with a faster one by
/// evaluating instruction combining pattern.
/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
/// combining based on machine trace metrics. Only combine a sequence of
@@ -406,8 +404,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
- const TargetSubtargetInfo &STI =
- MF.getTarget().getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
TII = STI.getInstrInfo();
TRI = STI.getRegisterInfo();
SchedModel = STI.getSchedModel();
@@ -416,8 +413,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = 0;
- OptSize = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp
index 0bee846..acb7c48 100644
--- a/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineDominanceFrontier.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/Analysis/DominanceFrontierImpl.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 8a2b610..151a260 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -67,17 +67,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
STI->getFrameLowering()->isStackRealignable(),
!F->hasFnAttribute("no-realign-stack"));
- if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackAlignment))
- FrameInfo->ensureMaxAlignment(Fn->getAttributes().
- getStackAlignment(AttributeSet::FunctionIndex));
+ if (Fn->hasFnAttribute(Attribute::StackAlignment))
+ FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
ConstantPool = new (Allocator) MachineConstantPool(TM);
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
- if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize))
+ if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
@@ -462,7 +459,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
/// normal 'L' label is returned.
MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate) const {
- const DataLayout *DL = getSubtarget().getDataLayout();
+ const DataLayout *DL = getTarget().getDataLayout();
assert(JumpTableInfo && "No jump tables");
assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
@@ -477,7 +474,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
/// base.
MCSymbol *MachineFunction::getPICBaseSymbol() const {
- const DataLayout *DL = getSubtarget().getDataLayout();
+ const DataLayout *DL = getTarget().getDataLayout();
return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
Twine(getFunctionNumber())+"$pb");
}
@@ -587,13 +584,20 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
return -++NumFixedObjects;
}
+int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) {
+ // Force the use of a frame pointer. The intention is that this intrinsic be
+ // used in conjunction with unwind mechanisms that leak the frame pointer.
+ setFrameAddressIsTaken(true);
+ Size = RoundUpToAlignment(Size, StackAlignment);
+ return CreateStackObject(Size, StackAlignment, false);
+}
+
BitVector
MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
assert(MBB && "MBB must be valid");
const MachineFunction *MF = MBB->getParent();
assert(MF && "MBB must be part of a MachineFunction");
- const TargetMachine &TM = MF->getTarget();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
BitVector BV(TRI->getNumRegs());
// Before CSI is calculated, no registers are considered pristine. They can be
@@ -813,7 +817,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
void MachineConstantPoolValue::anchor() { }
const DataLayout *MachineConstantPool::getDataLayout() const {
- return TM.getSubtargetImpl()->getDataLayout();
+ return TM.getDataLayout();
}
Type *MachineConstantPoolEntry::getType() const {
@@ -835,13 +839,13 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
switch (getRelocationInfo()) {
default:
llvm_unreachable("Unknown section kind");
- case 2:
+ case Constant::GlobalRelocations:
Kind = SectionKind::getReadOnlyWithRel();
break;
- case 1:
+ case Constant::LocalRelocation:
Kind = SectionKind::getReadOnlyWithRelLocal();
break;
- case 0:
+ case Constant::NoRelocation:
switch (DL->getTypeAllocSize(getType())) {
case 4:
Kind = SectionKind::getMergeableConst4();
@@ -853,7 +857,7 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
Kind = SectionKind::getMergeableConst16();
break;
default:
- Kind = SectionKind::getMergeableConst();
+ Kind = SectionKind::getReadOnly();
break;
}
}
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 789f204..aaf06a7 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -11,11 +11,18 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
@@ -43,15 +50,13 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
// because CodeGen overloads that to mean preserving the MachineBasicBlock
// CFG in addition to the LLVM IR CFG.
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved("scalar-evolution");
- AU.addPreserved("iv-users");
- AU.addPreserved("memdep");
- AU.addPreserved("live-values");
- AU.addPreserved("domtree");
- AU.addPreserved("domfrontier");
- AU.addPreserved("loops");
- AU.addPreserved("lda");
- AU.addPreserved("stack-protector");
+ AU.addPreserved<DominanceFrontier>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<IVUsers>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<StackProtector>();
FunctionPass::getAnalysisUsage(AU);
}
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index dee3977..790f5ac 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -52,7 +52,7 @@ char MachineFunctionPrinterPass::ID = 0;
}
char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
-INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
+INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer",
"Machine Function Printer", false, false)
namespace llvm {
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 7ad0d94..981e4a3 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -397,7 +397,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
break;
case MachineOperand::MO_Metadata:
OS << '<';
- getMetadata()->printAsOperand(OS, /*PrintType=*/false);
+ getMetadata()->printAsOperand(OS);
OS << '>';
break;
case MachineOperand::MO_MCSymbol:
@@ -537,7 +537,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
if (const MDNode *TBAAInfo = MMO.getAAInfo().TBAA) {
OS << "(tbaa=";
if (TBAAInfo->getNumOperands() > 0)
- TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
+ TBAAInfo->getOperand(0)->printAsOperand(OS);
else
OS << "<unknown>";
OS << ")";
@@ -548,7 +548,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
OS << "(alias.scope=";
if (ScopeInfo->getNumOperands() > 0)
for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
- ScopeInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false);
+ ScopeInfo->getOperand(i)->printAsOperand(OS);
if (i != ie-1)
OS << ",";
}
@@ -562,7 +562,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
OS << "(noalias=";
if (NoAliasInfo->getNumOperands() > 0)
for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
- NoAliasInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false);
+ NoAliasInfo->getOperand(i)->printAsOperand(OS);
if (i != ie-1)
OS << ",";
}
@@ -595,10 +595,12 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// implicit operands. It reserves space for the number of operands specified by
/// the MCInstrDesc.
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
- const DebugLoc dl, bool NoImp)
- : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0),
- Flags(0), AsmPrinterFlags(0),
- NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) {
+ DebugLoc dl, bool NoImp)
+ : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
+ AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
+ debugLoc(std::move(dl)) {
+ assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+
// Reserve space for the expected number of operands.
if (unsigned NumOps = MCID->getNumOperands() +
MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) {
@@ -617,12 +619,14 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
Flags(0), AsmPrinterFlags(0),
NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
debugLoc(MI.getDebugLoc()) {
+ assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+
CapOperands = OperandCapacity::get(MI.getNumOperands());
Operands = MF.allocateOperandArray(CapOperands);
// Copy operands.
- for (unsigned i = 0; i != MI.getNumOperands(); ++i)
- addOperand(MF, MI.getOperand(i));
+ for (const MachineOperand &MO : MI.operands())
+ addOperand(MF, MO);
// Copy all the sensible flags.
setFlags(MI.Flags);
@@ -641,18 +645,18 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
/// this instruction from their respective use lists. This requires that the
/// operands already be on their use lists.
void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (Operands[i].isReg())
- MRI.removeRegOperandFromUseList(&Operands[i]);
+ for (MachineOperand &MO : operands())
+ if (MO.isReg())
+ MRI.removeRegOperandFromUseList(&MO);
}
/// AddRegOperandsToUseLists - Add all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands not be on their use lists yet.
void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (Operands[i].isReg())
- MRI.addRegOperandToUseList(&Operands[i]);
+ for (MachineOperand &MO : operands())
+ if (MO.isReg())
+ MRI.addRegOperandToUseList(&MO);
}
void MachineInstr::addOperand(const MachineOperand &Op) {
@@ -670,14 +674,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
if (MRI)
return MRI->moveOperands(Dst, Src, NumOps);
- // Here it would be convenient to call memmove, so that isn't allowed because
- // MachineOperand has a constructor and so isn't a POD type.
- if (Dst < Src)
- for (unsigned i = 0; i != NumOps; ++i)
- new (Dst + i) MachineOperand(Src[i]);
- else
- for (unsigned i = NumOps; i ; --i)
- new (Dst + i - 1) MachineOperand(Src[i - 1]);
+ // MachineOperand is a trivially copyable type so we can just use memmove.
+ std::memmove(Dst, Src, NumOps * sizeof(MachineOperand));
}
/// addOperand - Add the specified operand to the instruction. If it is an
@@ -922,8 +920,7 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
MachineInstr *MI = (MachineInstr *)this;
MachineRegisterInfo &MRI = MF->getRegInfo();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
@@ -1326,8 +1323,7 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
/// clearKillInfo - Clears kill flags on all operands.
///
void MachineInstr::clearKillInfo() {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- MachineOperand &MO = getOperand(i);
+ for (MachineOperand &MO : operands()) {
if (MO.isReg() && MO.isUse())
MO.setIsKill(false);
}
@@ -1340,15 +1336,13 @@ void MachineInstr::substituteRegister(unsigned FromReg,
if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
if (SubIdx)
ToReg = RegInfo.getSubReg(ToReg, SubIdx);
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- MachineOperand &MO = getOperand(i);
+ for (MachineOperand &MO : operands()) {
if (!MO.isReg() || MO.getReg() != FromReg)
continue;
MO.substPhysReg(ToReg, RegInfo);
}
} else {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- MachineOperand &MO = getOperand(i);
+ for (MachineOperand &MO : operands()) {
if (!MO.isReg() || MO.getReg() != FromReg)
continue;
MO.substVirtReg(ToReg, SubIdx, RegInfo);
@@ -1491,8 +1485,7 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
///
bool MachineInstr::allDefsAreDead() const {
- for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
- const MachineOperand &MO = getOperand(i);
+ for (const MachineOperand &MO : operands()) {
if (!MO.isReg() || MO.isUse())
continue;
if (!MO.isDead())
@@ -1823,8 +1816,7 @@ void MachineInstr::clearRegisterKills(unsigned Reg,
const TargetRegisterInfo *RegInfo) {
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
RegInfo = nullptr;
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- MachineOperand &MO = getOperand(i);
+ for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
unsigned OpReg = MO.getReg();
@@ -1885,6 +1877,22 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
return true;
}
+void MachineInstr::clearRegisterDeads(unsigned Reg) {
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
+ continue;
+ MO.setIsDead(false);
+ }
+}
+
+void MachineInstr::addRegisterDefReadUndef(unsigned Reg) {
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
+ continue;
+ MO.setIsUndef();
+ }
+}
+
void MachineInstr::addRegisterDefined(unsigned Reg,
const TargetRegisterInfo *RegInfo) {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -1892,8 +1900,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg,
if (MO)
return;
} else {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = getOperand(i);
+ for (const MachineOperand &MO : operands()) {
if (MO.isReg() && MO.getReg() == Reg && MO.isDef() &&
MO.getSubReg() == 0)
return;
@@ -1907,8 +1914,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg,
void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
const TargetRegisterInfo &TRI) {
bool HasRegMask = false;
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- MachineOperand &MO = getOperand(i);
+ for (MachineOperand &MO : operands()) {
if (MO.isRegMask()) {
HasRegMask = true;
continue;
@@ -1916,15 +1922,10 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- bool Dead = true;
- for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
- I != E; ++I)
- if (TRI.regsOverlap(*I, Reg)) {
- Dead = false;
- break;
- }
// If there are no uses, including partial uses, the def is dead.
- if (Dead) MO.setIsDead();
+ if (std::none_of(UsedRegs.begin(), UsedRegs.end(),
+ [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+ MO.setIsDead();
}
// This is a call with a register mask operand.
@@ -1941,8 +1942,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
SmallVector<size_t, 8> HashComponents;
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.isDef() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
@@ -1960,7 +1960,8 @@ void MachineInstr::emitError(StringRef Msg) const {
if (getOperand(i-1).isMetadata() &&
(LocMD = getOperand(i-1).getMetadata()) &&
LocMD->getNumOperands() != 0) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
LocCookie = CI->getZExtValue();
break;
}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 2ab0467..64d0932 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -49,6 +49,11 @@ AvoidSpeculation("avoid-speculation",
cl::desc("MachineLICM should avoid speculation"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+HoistCheapInsts("hoist-cheap-insts",
+ cl::desc("MachineLICM should hoist even cheap instructions"),
+ cl::init(false), cl::Hidden);
+
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
STATISTIC(NumLowRP,
@@ -688,6 +693,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
/// one pass without iteration.
///
void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
SmallVector<MachineDomTreeNode*, 32> Scopes;
SmallVector<MachineDomTreeNode*, 8> WorkList;
DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
@@ -695,7 +704,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
// Perform a DFS walk to determine the order of visit.
WorkList.push_back(HeaderN);
- do {
+ while (!WorkList.empty()) {
MachineDomTreeNode *Node = WorkList.pop_back_val();
assert(Node && "Null dominator tree node?");
MachineBasicBlock *BB = Node->getBlock();
@@ -729,28 +738,21 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
ParentMap[Child] = Node;
WorkList.push_back(Child);
}
- } while (!WorkList.empty());
+ }
- if (Scopes.size() != 0) {
- MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader)
- return;
+ if (Scopes.size() == 0)
+ return;
- // Compute registers which are livein into the loop headers.
- RegSeen.clear();
- BackTrace.clear();
- InitRegPressure(Preheader);
- }
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
// Now perform LICM.
for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
MachineDomTreeNode *Node = Scopes[i];
MachineBasicBlock *MBB = Node->getBlock();
- MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader)
- continue;
-
EnterScope(MBB);
// Process the block
@@ -1075,7 +1077,7 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
// Don't hoist cheap instructions if they would increase register pressure,
// even if we're under the limit.
- if (CheapInstr)
+ if (CheapInstr && !HoistCheapInsts)
return true;
for (unsigned i = BackTrace.size(); i != 0; --i) {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index eb3c0bf..fca7df0 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,6 +9,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -273,9 +274,10 @@ bool MachineModuleInfo::doInitialization(Module &M) {
CurCallSite = 0;
CallsEHReturn = 0;
CallsUnwindInit = 0;
- DbgInfoAvailable = UsesVAFloatArgument = false;
+ DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
// Always emit some info, by default "no personality" info.
Personalities.push_back(nullptr);
+ PersonalityTypeCache = EHPersonality::Unknown;
AddrLabelSymbols = nullptr;
TheModule = nullptr;
@@ -452,6 +454,14 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
LP.TypeIds.push_back(0);
}
+MCSymbol *
+MachineModuleInfo::addClauseForLandingPad(MachineBasicBlock *LandingPad) {
+ MCSymbol *ClauseLabel = Context.CreateTempSymbol();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.ClauseLabels.push_back(ClauseLabel);
+ return ClauseLabel;
+}
+
/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
/// pads.
void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
@@ -546,11 +556,17 @@ try_next:;
/// getPersonality - Return the personality function for the current function.
const Function *MachineModuleInfo::getPersonality() const {
- // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
- // function
- return !LandingPads.empty() ? LandingPads[0].Personality : nullptr;
+ for (const LandingPadInfo &LPI : LandingPads)
+ if (LPI.Personality)
+ return LPI.Personality;
+ return nullptr;
}
+EHPersonality MachineModuleInfo::getPersonalityType() {
+ if (PersonalityTypeCache == EHPersonality::Unknown)
+ PersonalityTypeCache = classifyEHPersonality(getPersonality());
+ return PersonalityTypeCache;
+}
/// getPersonalityIndex - Return unique index for current personality
/// function. NULL/first personality function should always get zero index.
unsigned MachineModuleInfo::getPersonalityIndex() const {
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index 5a5035e..01d2c2e 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,8 +1,8 @@
#include "llvm/CodeGen/MachineRegionInfo.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/RegionInfoImpl.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#define DEBUG_TYPE "region"
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index e9612f3..32b7db1 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -24,7 +24,8 @@ using namespace llvm;
void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
- : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) {
+ : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true),
+ TracksSubRegLiveness(false) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
@@ -60,8 +61,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg,
}
bool
-MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
- const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
+MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterClass *NewRC =
getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC);
@@ -128,6 +129,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
<< " use list MachineOperand " << MO
<< " has no parent instruction.\n";
Valid = false;
+ continue;
}
MachineOperand *MO0 = &MI->getOperand(0);
unsigned NumOps = MI->getNumOperands();
@@ -391,6 +393,14 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
}
}
+unsigned MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const
+{
+ // Lane masks are only defined for vregs.
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const TargetRegisterClass &TRC = *getRegClass(Reg);
+ return TRC.getLaneMask();
+}
+
#ifndef NDEBUG
void MachineRegisterInfo::dumpUses(unsigned Reg) const {
for (MachineInstr &I : use_instructions(Reg))
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 261942f..89ac6a8 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -144,12 +144,12 @@ char MachineScheduler::ID = 0;
char &llvm::MachineSchedulerID = MachineScheduler::ID;
-INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
+INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
"Machine Instruction Scheduler", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(MachineScheduler, "misched",
+INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
"Machine Instruction Scheduler", false, false)
MachineScheduler::MachineScheduler()
@@ -336,9 +336,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
if (skipOptnoneFunction(*mf.getFunction()))
return false;
- const TargetSubtargetInfo &ST =
- mf.getTarget().getSubtarget<TargetSubtargetInfo>();
- if (!ST.enablePostMachineScheduler()) {
+ if (!mf.getSubtarget().enablePostMachineScheduler()) {
DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}
@@ -430,9 +428,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
+ for(;I != MBB->begin(); --I, --RemainingInstrs) {
if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
break;
+ if (!I->isDebugValue())
+ ++NumRegionInstrs;
}
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
@@ -1432,12 +1432,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
// Check if either the dest or source is local. If it's live across a back
// edge, it's not local. Note that if both vregs are live across the back
// edge, we cannot successfully contrain the copy without cyclic scheduling.
- unsigned LocalReg = DstReg;
- unsigned GlobalReg = SrcReg;
+ // If both the copy's source and dest are local live intervals, then we
+ // should treat the dest as the global for the purpose of adding
+ // constraints. This adds edges from source's other uses to the copy.
+ unsigned LocalReg = SrcReg;
+ unsigned GlobalReg = DstReg;
LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
- LocalReg = SrcReg;
- GlobalReg = DstReg;
+ LocalReg = DstReg;
+ GlobalReg = SrcReg;
LocalLI = &LIS->getInterval(LocalReg);
if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
return;
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index ba25bca..8337793 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -112,7 +112,7 @@ namespace {
/// for the lifetime of an iteration.
///
/// \return True if the edge is marked as toSplit, false otherwise.
- /// False can be retruned if, for instance, this is not profitable.
+ /// False can be returned if, for instance, this is not profitable.
bool PostponeSplitCriticalEdge(MachineInstr *MI,
MachineBasicBlock *From,
MachineBasicBlock *To,
@@ -504,7 +504,7 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
// If SuccToSinkTo post dominates then also it may be profitable if MI
// can further profitably sinked into another block in next round.
bool BreakPHIEdge = false;
- // FIXME - If finding successor is compile time expensive then catch results.
+ // FIXME - If finding successor is compile time expensive then cache results.
if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
@@ -553,19 +553,6 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
return nullptr;
- // FIXME: This picks a successor to sink into based on having one
- // successor that dominates all the uses. However, there are cases where
- // sinking can happen but where the sink point isn't a successor. For
- // example:
- //
- // x = computation
- // if () {} else {}
- // use x
- //
- // the instruction could be sunk over the whole diamond for the
- // if/then/else (or loop, etc), allowing it to be sunk into other blocks
- // after that.
-
// Virtual register defs can only be sunk if all their uses are in blocks
// dominated by one of the successors.
if (SuccToSinkTo) {
@@ -585,6 +572,23 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// higher priority, otherwise prioritize smaller loop depths.
SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(),
MBB->succ_end());
+
+ // Handle cases where sinking can happen but where the sink point isn't a
+ // successor. For example:
+ //
+ // x = computation
+ // if () {} else {}
+ // use x
+ //
+ const std::vector<MachineDomTreeNode *> &Children =
+ DT->getNode(MBB)->getChildren();
+ for (const auto &DTChild : Children)
+ // DomTree children of MBB that have MBB as immediate dominator are added.
+ if (DTChild->getIDom()->getBlock() == MI->getParent() &&
+ // Skip MBBs already added to the Succs vector above.
+ !MBB->isSuccessor(DTChild->getBlock()))
+ Succs.push_back(DTChild->getBlock());
+
// Sort Successors according to their loop depth or block frequency info.
std::stable_sort(
Succs.begin(), Succs.end(),
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 2cf87eb..8aacd1f 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -52,12 +52,11 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
- TII = MF->getSubtarget().getInstrInfo();
- TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
MRI = &MF->getRegInfo();
Loops = &getAnalysis<MachineLoopInfo>();
- const TargetSubtargetInfo &ST =
- MF->getTarget().getSubtarget<TargetSubtargetInfo>();
SchedModel.init(ST.getSchedModel(), &ST, TII);
BlockInfo.resize(MF->getNumBlockIDs());
ProcResourceCycles.resize(MF->getNumBlockIDs() *
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 99f0583..bdb094f 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -54,16 +55,13 @@ namespace {
MachineVerifier(Pass *pass, const char *b) :
PASS(pass),
- Banner(b),
- OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+ Banner(b)
{}
bool runOnMachineFunction(MachineFunction &MF);
Pass *const PASS;
const char *Banner;
- const char *const OutFileName;
- raw_ostream *OS;
const MachineFunction *MF;
const TargetMachine *TM;
const TargetInstrInfo *TII;
@@ -215,9 +213,9 @@ namespace {
void report(const char *msg, const MachineBasicBlock *MBB,
const LiveInterval &LI);
void report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR, unsigned Reg);
+ const LiveRange &LR, unsigned Reg, unsigned LaneMask);
void report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR, unsigned Reg);
+ const LiveRange &LR, unsigned Reg, unsigned LaneMask);
void verifyInlineAsm(const MachineInstr *MI);
@@ -230,20 +228,22 @@ namespace {
void verifyLiveVariables();
void verifyLiveIntervals();
void verifyLiveInterval(const LiveInterval&);
- void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned);
+ void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned,
+ unsigned);
void verifyLiveRangeSegment(const LiveRange&,
- const LiveRange::const_iterator I, unsigned);
- void verifyLiveRange(const LiveRange&, unsigned);
+ const LiveRange::const_iterator I, unsigned,
+ unsigned);
+ void verifyLiveRange(const LiveRange&, unsigned, unsigned LaneMask = 0);
void verifyStackFrame();
};
struct MachineVerifierPass : public MachineFunctionPass {
static char ID; // Pass ID, replacement for typeid
- const char *const Banner;
+ const std::string Banner;
- MachineVerifierPass(const char *b = nullptr)
- : MachineFunctionPass(ID), Banner(b) {
+ MachineVerifierPass(const std::string &banner = nullptr)
+ : MachineFunctionPass(ID), Banner(banner) {
initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
}
@@ -253,7 +253,7 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- MF.verify(this, Banner);
+ MF.verify(this, Banner.c_str());
return false;
}
};
@@ -264,7 +264,7 @@ char MachineVerifierPass::ID = 0;
INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
"Verify generated machine code", false, false)
-FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) {
return new MachineVerifierPass(Banner);
}
@@ -274,22 +274,6 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
}
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
- raw_ostream *OutFile = nullptr;
- if (OutFileName) {
- std::error_code EC;
- OutFile = new raw_fd_ostream(OutFileName, EC,
- sys::fs::F_Append | sys::fs::F_Text);
- if (EC) {
- errs() << "Error opening '" << OutFileName << "': " << EC.message()
- << '\n';
- exit(1);
- }
-
- OS = OutFile;
- } else {
- OS = &errs();
- }
-
foundErrors = 0;
this->MF = &MF;
@@ -324,7 +308,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
if (MBBI->getParent() != MFI) {
report("Bad instruction parent pointer", MFI);
- *OS << "Instruction: " << *MBBI;
+ errs() << "Instruction: " << *MBBI;
continue;
}
@@ -360,9 +344,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
}
visitMachineFunctionAfter();
- if (OutFile)
- delete OutFile;
- else if (foundErrors)
+ if (foundErrors)
report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
// Clean up.
@@ -379,70 +361,76 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
assert(MF);
- *OS << '\n';
+ errs() << '\n';
if (!foundErrors++) {
if (Banner)
- *OS << "# " << Banner << '\n';
- MF->print(*OS, Indexes);
+ errs() << "# " << Banner << '\n';
+ MF->print(errs(), Indexes);
}
- *OS << "*** Bad machine code: " << msg << " ***\n"
+ errs() << "*** Bad machine code: " << msg << " ***\n"
<< "- function: " << MF->getName() << "\n";
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
assert(MBB);
report(msg, MBB->getParent());
- *OS << "- basic block: BB#" << MBB->getNumber()
+ errs() << "- basic block: BB#" << MBB->getNumber()
<< ' ' << MBB->getName()
<< " (" << (const void*)MBB << ')';
if (Indexes)
- *OS << " [" << Indexes->getMBBStartIdx(MBB)
+ errs() << " [" << Indexes->getMBBStartIdx(MBB)
<< ';' << Indexes->getMBBEndIdx(MBB) << ')';
- *OS << '\n';
+ errs() << '\n';
}
void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
assert(MI);
report(msg, MI->getParent());
- *OS << "- instruction: ";
+ errs() << "- instruction: ";
if (Indexes && Indexes->hasIndex(MI))
- *OS << Indexes->getInstructionIndex(MI) << '\t';
- MI->print(*OS, TM);
+ errs() << Indexes->getInstructionIndex(MI) << '\t';
+ MI->print(errs(), TM);
}
void MachineVerifier::report(const char *msg,
const MachineOperand *MO, unsigned MONum) {
assert(MO);
report(msg, MO->getParent());
- *OS << "- operand " << MONum << ": ";
- MO->print(*OS, TM);
- *OS << "\n";
+ errs() << "- operand " << MONum << ": ";
+ MO->print(errs(), TM);
+ errs() << "\n";
}
void MachineVerifier::report(const char *msg, const MachineFunction *MF,
const LiveInterval &LI) {
report(msg, MF);
- *OS << "- interval: " << LI << '\n';
+ errs() << "- interval: " << LI << '\n';
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
const LiveInterval &LI) {
report(msg, MBB);
- *OS << "- interval: " << LI << '\n';
+ errs() << "- interval: " << LI << '\n';
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR, unsigned Reg) {
+ const LiveRange &LR, unsigned Reg,
+ unsigned LaneMask) {
report(msg, MBB);
- *OS << "- liverange: " << LR << '\n';
- *OS << "- register: " << PrintReg(Reg, TRI) << '\n';
+ errs() << "- liverange: " << LR << '\n';
+ errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
+ if (LaneMask != 0)
+ errs() << "- lanemask: " << format("%04X\n", LaneMask);
}
void MachineVerifier::report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR, unsigned Reg) {
+ const LiveRange &LR, unsigned Reg,
+ unsigned LaneMask) {
report(msg, MF);
- *OS << "- liverange: " << LR << '\n';
- *OS << "- register: " << PrintReg(Reg, TRI) << '\n';
+ errs() << "- liverange: " << LR << '\n';
+ errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
+ if (LaneMask != 0)
+ errs() << "- lanemask: " << format("%04X\n", LaneMask);
}
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -530,7 +518,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB has successor that isn't part of the function.", MBB);
if (!MBBInfoMap[*I].Preds.count(MBB)) {
report("Inconsistent CFG", MBB);
- *OS << "MBB is not in the predecessor list of the successor BB#"
+ errs() << "MBB is not in the predecessor list of the successor BB#"
<< (*I)->getNumber() << ".\n";
}
}
@@ -542,7 +530,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB has predecessor that isn't part of the function.", MBB);
if (!MBBInfoMap[*I].Succs.count(MBB)) {
report("Inconsistent CFG", MBB);
- *OS << "MBB is not in the successor list of the predecessor BB#"
+ errs() << "MBB is not in the successor list of the predecessor BB#"
<< (*I)->getNumber() << ".\n";
}
}
@@ -592,7 +580,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
} else if (TBB && !FBB && Cond.empty()) {
// Block unconditionally branches somewhere.
- if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ // If the block has exactly one successor, that happens to be a
+ // landingpad, accept it as valid control flow.
+ if (MBB->succ_size() != 1+LandingPadSuccs.size() &&
+ (MBB->succ_size() != 1 || LandingPadSuccs.size() != 1 ||
+ *MBB->succ_begin() != *LandingPadSuccs.begin())) {
report("MBB exits via unconditional branch but doesn't have "
"exactly one CFG successor!", MBB);
} else if (!MBB->isSuccessor(TBB)) {
@@ -713,7 +705,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
SlotIndex idx = Indexes->getInstructionIndex(MI);
if (!(idx > lastIndex)) {
report("Instruction index out of order", MI);
- *OS << "Last instruction was at " << lastIndex << '\n';
+ errs() << "Last instruction was at " << lastIndex << '\n';
}
lastIndex = idx;
}
@@ -726,7 +718,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
FirstTerminator = MI;
} else if (FirstTerminator) {
report("Non-terminator instruction after the first terminator", MI);
- *OS << "First terminator was:\t" << *FirstTerminator;
+ errs() << "First terminator was:\t" << *FirstTerminator;
}
}
@@ -778,7 +770,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
if (MI->getNumOperands() < MCID.getNumOperands()) {
report("Too few operands", MI);
- *OS << MCID.getNumOperands() << " operands expected, but "
+ errs() << MCID.getNumOperands() << " operands expected, but "
<< MI->getNumOperands() << " given.\n";
}
@@ -908,7 +900,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (!DRC->contains(Reg)) {
report("Illegal physical register for instruction", MO, MONum);
- *OS << TRI->getName(Reg) << " is not a "
+ errs() << TRI->getName(Reg) << " is not a "
<< TRI->getRegClassName(DRC) << " register.\n";
}
}
@@ -920,13 +912,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
TRI->getSubClassWithSubReg(RC, SubIdx);
if (!SRC) {
report("Invalid subregister index for virtual register", MO, MONum);
- *OS << "Register class " << TRI->getRegClassName(RC)
+ errs() << "Register class " << TRI->getRegClassName(RC)
<< " does not support subreg index " << SubIdx << "\n";
return;
}
if (RC != SRC) {
report("Invalid register class for subregister index", MO, MONum);
- *OS << "Register class " << TRI->getRegClassName(RC)
+ errs() << "Register class " << TRI->getRegClassName(RC)
<< " does not fully support subreg index " << SubIdx << "\n";
return;
}
@@ -948,7 +940,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
if (!RC->hasSuperClassEq(DRC)) {
report("Illegal virtual register for instruction", MO, MONum);
- *OS << "Expected a " << TRI->getRegClassName(DRC)
+ errs() << "Expected a " << TRI->getRegClassName(DRC)
<< " register, but got a " << TRI->getRegClassName(RC)
<< " register\n";
}
@@ -974,11 +966,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
SlotIndex Idx = LiveInts->getInstructionIndex(MI);
if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
report("Instruction loads from dead spill slot", MO, MONum);
- *OS << "Live stack: " << LI << '\n';
+ errs() << "Live stack: " << LI << '\n';
}
if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
report("Instruction stores to dead spill slot", MO, MONum);
- *OS << "Live stack: " << LI << '\n';
+ errs() << "Live stack: " << LI << '\n';
}
}
break;
@@ -1017,12 +1009,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
LiveQueryResult LRQ = LR->Query(UseIdx);
if (!LRQ.valueIn()) {
report("No live segment at use", MO, MONum);
- *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+ errs() << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
<< ' ' << *LR << '\n';
}
if (MO->isKill() && !LRQ.isKill()) {
report("Live range continues after kill flag", MO, MONum);
- *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n';
+ errs() << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n';
}
}
}
@@ -1035,13 +1027,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
LiveQueryResult LRQ = LI.Query(UseIdx);
if (!LRQ.valueIn()) {
report("No live segment at use", MO, MONum);
- *OS << UseIdx << " is not live in " << LI << '\n';
+ errs() << UseIdx << " is not live in " << LI << '\n';
}
// Check for extra kill flags.
// Note that we allow missing kill flags for now.
if (MO->isKill() && !LRQ.isKill()) {
report("Live range continues after kill flag", MO, MONum);
- *OS << "Live range: " << LI << '\n';
+ errs() << "Live range: " << LI << '\n';
}
} else {
report("Virtual register has no live interval", MO, MONum);
@@ -1053,7 +1045,37 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!regsLive.count(Reg)) {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
// Reserved registers may be used even when 'dead'.
- if (!isReserved(Reg))
+ bool Bad = !isReserved(Reg);
+ // We are fine if just any subregister has a defined value.
+ if (Bad) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid();
+ ++SubRegs) {
+ if (regsLive.count(*SubRegs)) {
+ Bad = false;
+ break;
+ }
+ }
+ }
+ // If there is an additional implicit-use of a super register we stop
+ // here. By definition we are fine if the super register is not
+ // (completely) dead, if the complete super register is dead we will
+ // get a report for its operand.
+ if (Bad) {
+ for (const MachineOperand &MOP : MI->uses()) {
+ if (!MOP.isReg())
+ continue;
+ if (!MOP.isImplicit())
+ continue;
+ for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
+ ++SubRegs) {
+ if (*SubRegs == Reg) {
+ Bad = false;
+ break;
+ }
+ }
+ }
+ }
+ if (Bad)
report("Using an undefined physical register", MO, MONum);
} else if (MRI->def_empty(Reg)) {
report("Reading virtual register without a def", MO, MONum);
@@ -1094,19 +1116,19 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
assert(VNI && "NULL valno is not allowed");
if (VNI->def != DefIdx) {
report("Inconsistent valno->def", MO, MONum);
- *OS << "Valno " << VNI->id << " is not defined at "
+ errs() << "Valno " << VNI->id << " is not defined at "
<< DefIdx << " in " << LI << '\n';
}
} else {
report("No live segment at def", MO, MONum);
- *OS << DefIdx << " is not live in " << LI << '\n';
+ errs() << DefIdx << " is not live in " << LI << '\n';
}
// Check that, if the dead def flag is present, LiveInts agree.
if (MO->isDead()) {
LiveQueryResult LRQ = LI.Query(DefIdx);
if (!LRQ.isDeadDef()) {
report("Live range continues after dead def flag", MO, MONum);
- *OS << "Live range: " << LI << '\n';
+ errs() << "Live range: " << LI << '\n';
}
}
} else {
@@ -1148,7 +1170,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
SlotIndex stop = Indexes->getMBBEndIdx(MBB);
if (!(stop > lastIndex)) {
report("Block ends before last instruction index", MBB);
- *OS << "Block ends at " << stop
+ errs() << "Block ends at " << stop
<< " last instruction was at " << lastIndex << '\n';
}
lastIndex = stop;
@@ -1250,7 +1272,7 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
if (!seen.count(*PrI)) {
report("Missing PHI operand", &BBI);
- *OS << "BB#" << (*PrI)->getNumber()
+ errs() << "BB#" << (*PrI)->getNumber()
<< " is a predecessor according to the CFG.\n";
}
}
@@ -1281,7 +1303,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
++I)
if (MInfo.regsKilled.count(*I)) {
report("Virtual register killed in block, but needed live out.", &MBB);
- *OS << "Virtual register " << PrintReg(*I)
+ errs() << "Virtual register " << PrintReg(*I)
<< " is used after the block.\n";
}
}
@@ -1313,13 +1335,13 @@ void MachineVerifier::verifyLiveVariables() {
if (MInfo.vregsRequired.count(Reg)) {
if (!VI.AliveBlocks.test(MBB.getNumber())) {
report("LiveVariables: Block missing from AliveBlocks", &MBB);
- *OS << "Virtual register " << PrintReg(Reg)
+ errs() << "Virtual register " << PrintReg(Reg)
<< " must be live through the block.\n";
}
} else {
if (VI.AliveBlocks.test(MBB.getNumber())) {
report("LiveVariables: Block should not be in AliveBlocks", &MBB);
- *OS << "Virtual register " << PrintReg(Reg)
+ errs() << "Virtual register " << PrintReg(Reg)
<< " is not needed live through the block.\n";
}
}
@@ -1338,7 +1360,7 @@ void MachineVerifier::verifyLiveIntervals() {
if (!LiveInts->hasInterval(Reg)) {
report("Missing live interval for virtual register", MF);
- *OS << PrintReg(Reg, TRI) << " still has defs or uses\n";
+ errs() << PrintReg(Reg, TRI) << " still has defs or uses\n";
continue;
}
@@ -1354,38 +1376,40 @@ void MachineVerifier::verifyLiveIntervals() {
}
void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
- const VNInfo *VNI,
- unsigned Reg) {
+ const VNInfo *VNI, unsigned Reg,
+ unsigned LaneMask) {
if (VNI->isUnused())
return;
const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
if (!DefVNI) {
- report("Valno not live at def and not marked unused", MF, LR, Reg);
- *OS << "Valno #" << VNI->id << '\n';
+ report("Valno not live at def and not marked unused", MF, LR, Reg,
+ LaneMask);
+ errs() << "Valno #" << VNI->id << '\n';
return;
}
if (DefVNI != VNI) {
- report("Live segment at def has different valno", MF, LR, Reg);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ report("Live segment at def has different valno", MF, LR, Reg, LaneMask);
+ errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " where valno #" << DefVNI->id << " is live\n";
return;
}
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
if (!MBB) {
- report("Invalid definition index", MF, LR, Reg);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ report("Invalid definition index", MF, LR, Reg, LaneMask);
+ errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LR << '\n';
return;
}
if (VNI->isPHIDef()) {
if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MBB, LR, Reg);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ report("PHIDef value is not defined at MBB start", MBB, LR, Reg,
+ LaneMask);
+ errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
<< ", not at the beginning of BB#" << MBB->getNumber() << '\n';
}
return;
@@ -1394,8 +1418,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
// Non-PHI def.
const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
if (!MI) {
- report("No instruction at def index", MBB, LR, Reg);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ report("No instruction at def index", MBB, LR, Reg, LaneMask);
+ errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
return;
}
@@ -1413,6 +1437,9 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
!TRI->hasRegUnit(MOI->getReg(), Reg))
continue;
}
+ if (LaneMask != 0 &&
+ (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0)
+ continue;
hasDef = true;
if (MOI->isEarlyClobber())
isEarlyClobber = true;
@@ -1420,7 +1447,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
if (!hasDef) {
report("Defining instruction does not modify register", MI);
- *OS << "Valno #" << VNI->id << " in " << LR << '\n';
+ errs() << "Valno #" << VNI->id << " in " << LR << '\n';
}
// Early clobber defs begin at USE slots, but other defs must begin at
@@ -1428,51 +1455,52 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
if (isEarlyClobber) {
if (!VNI->def.isEarlyClobber()) {
report("Early clobber def must be at an early-clobber slot", MBB, LR,
- Reg);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ Reg, LaneMask);
+ errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
}
} else if (!VNI->def.isRegister()) {
report("Non-PHI, non-early clobber def must be at a register slot",
- MBB, LR, Reg);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ MBB, LR, Reg, LaneMask);
+ errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
}
}
}
void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const LiveRange::const_iterator I,
- unsigned Reg) {
+ unsigned Reg, unsigned LaneMask) {
const LiveRange::Segment &S = *I;
const VNInfo *VNI = S.valno;
assert(VNI && "Live segment has no valno");
if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
- report("Foreign valno in live segment", MF, LR, Reg);
- *OS << S << " has a bad valno\n";
+ report("Foreign valno in live segment", MF, LR, Reg, LaneMask);
+ errs() << S << " has a bad valno\n";
}
if (VNI->isUnused()) {
- report("Live segment valno is marked unused", MF, LR, Reg);
- *OS << S << '\n';
+ report("Live segment valno is marked unused", MF, LR, Reg, LaneMask);
+ errs() << S << '\n';
}
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
if (!MBB) {
- report("Bad start of live segment, no basic block", MF, LR, Reg);
- *OS << S << '\n';
+ report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask);
+ errs() << S << '\n';
return;
}
SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
if (S.start != MBBStartIdx && S.start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg);
- *OS << S << '\n';
+ report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg,
+ LaneMask);
+ errs() << S << '\n';
}
const MachineBasicBlock *EndMBB =
LiveInts->getMBBFromIndex(S.end.getPrevSlot());
if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF, LR, Reg);
- *OS << S << '\n';
+ report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask);
+ errs() << S << '\n';
return;
}
@@ -1489,15 +1517,17 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const MachineInstr *MI =
LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg);
- *OS << S << '\n';
+ report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg,
+ LaneMask);
+ errs() << S << '\n';
return;
}
// The block slot must refer to a basic block boundary.
if (S.end.isBlock()) {
- report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg);
- *OS << S << '\n';
+ report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg,
+ LaneMask);
+ errs() << S << '\n';
}
if (S.end.isDead()) {
@@ -1505,8 +1535,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// That means there must be a dead def.
if (!SlotIndex::isSameInstr(S.start, S.end)) {
report("Live segment ending at dead slot spans instructions", EndMBB, LR,
- Reg);
- *OS << S << '\n';
+ Reg, LaneMask);
+ errs() << S << '\n';
}
}
@@ -1515,8 +1545,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (S.end.isEarlyClobber()) {
if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", EndMBB, LR, Reg);
- *OS << S << '\n';
+ "redefined by an EC def in the same instruction", EndMBB, LR, Reg,
+ LaneMask);
+ errs() << S << '\n';
}
}
@@ -1526,16 +1557,27 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// A live segment can end with either a redefinition, a kill flag on a
// use, or a dead flag on a def.
bool hasRead = false;
+ bool hasSubRegDef = false;
for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
if (!MOI->isReg() || MOI->getReg() != Reg)
continue;
+ if (LaneMask != 0 &&
+ (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0)
+ continue;
+ if (MOI->isDef() && MOI->getSubReg() != 0)
+ hasSubRegDef = true;
if (MOI->readsReg())
hasRead = true;
}
if (!S.end.isDead()) {
if (!hasRead) {
- report("Instruction ending live segment doesn't read the register", MI);
- *OS << S << " in " << LR << '\n';
+ // When tracking subregister liveness, the main range must start new
+ // values on partial register writes, even if there is no read.
+ if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) {
+ report("Instruction ending live segment doesn't read the register",
+ MI);
+ errs() << S << " in " << LR << '\n';
+ }
}
}
}
@@ -1573,8 +1615,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// All predecessors must have a live-out value.
if (!PVNI) {
- report("Register not marked live out of predecessor", *PI, LR, Reg);
- *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ report("Register not marked live out of predecessor", *PI, LR, Reg,
+ LaneMask);
+ errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
<< '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
<< PEnd << '\n';
continue;
@@ -1582,8 +1625,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Only PHI-defs can take different predecessor values.
if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI, LR, Reg);
- *OS << "Valno #" << PVNI->id << " live out of BB#"
+ report("Different value live out of predecessor", *PI, LR, Reg,
+ LaneMask);
+ errs() << "Valno #" << PVNI->id << " live out of BB#"
<< (*PI)->getNumber() << '@' << PEnd
<< "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
<< '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
@@ -1595,18 +1639,36 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
}
}
-void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg) {
- for (LiveRange::const_vni_iterator I = LR.vni_begin(), E = LR.vni_end();
- I != E; ++I)
- verifyLiveRangeValue(LR, *I, Reg);
+void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
+ unsigned LaneMask) {
+ for (const VNInfo *VNI : LR.valnos)
+ verifyLiveRangeValue(LR, VNI, Reg, LaneMask);
for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I)
- verifyLiveRangeSegment(LR, I, Reg);
+ verifyLiveRangeSegment(LR, I, Reg, LaneMask);
}
void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
verifyLiveRange(LI, LI.reg);
+ unsigned Reg = LI.reg;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ unsigned Mask = 0;
+ unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((Mask & SR.LaneMask) != 0)
+ report("Lane masks of sub ranges overlap in live interval", MF, LI);
+ if ((SR.LaneMask & ~MaxMask) != 0)
+ report("Subrange lanemask is invalid", MF, LI);
+ Mask |= SR.LaneMask;
+ verifyLiveRange(SR, LI.reg, SR.LaneMask);
+ if (!LI.covers(SR))
+ report("A Subrange is not covered by the main range", MF, LI);
+ }
+ } else if (LI.hasSubRanges()) {
+ report("subregister liveness only allowed for virtual registers", MF, LI);
+ }
+
// Check the LI only has one connected component.
if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
ConnectedVNInfoEqClasses ConEQ(*LiveInts);
@@ -1614,12 +1676,12 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
if (NumComp > 1) {
report("Multiple connected components in live interval", MF, LI);
for (unsigned comp = 0; comp != NumComp; ++comp) {
- *OS << comp << ": valnos";
+ errs() << comp << ": valnos";
for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
E = LI.vni_end(); I!=E; ++I)
if (comp == ConEQ.getEqClass(*I))
- *OS << ' ' << (*I)->id;
- *OS << '\n';
+ errs() << ' ' << (*I)->id;
+ errs() << '\n';
}
}
}
@@ -1700,7 +1762,7 @@ void MachineVerifier::verifyStackFrame() {
BBState.ExitValue;
if (BBState.ExitIsSetup && AbsSPAdj != Size) {
report("FrameDestroy <n> is after FrameSetup <m>", &I);
- *OS << "FrameDestroy <" << Size << "> is after FrameSetup <"
+ errs() << "FrameDestroy <" << Size << "> is after FrameSetup <"
<< AbsSPAdj << ">.\n";
}
BBState.ExitValue += Size;
@@ -1717,7 +1779,7 @@ void MachineVerifier::verifyStackFrame() {
(SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue ||
SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
report("The exit stack state of a predecessor is inconsistent.", MBB);
- *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
+ errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
<< SPState[(*I)->getNumber()].ExitValue << ", "
<< SPState[(*I)->getNumber()].ExitIsSetup
<< "), while BB#" << MBB->getNumber() << " has entry state ("
@@ -1733,7 +1795,7 @@ void MachineVerifier::verifyStackFrame() {
(SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue ||
SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
report("The entry stack state of a successor is inconsistent.", MBB);
- *OS << "Successor BB#" << (*I)->getNumber() << " has entry state ("
+ errs() << "Successor BB#" << (*I)->getNumber() << " has entry state ("
<< SPState[(*I)->getNumber()].EntryValue << ", "
<< SPState[(*I)->getNumber()].EntryIsSetup
<< "), while BB#" << MBB->getNumber() << " has exit state ("
diff --git a/lib/CodeGen/OcamlGC.cpp b/lib/CodeGen/OcamlGC.cpp
index 48db200..17654a6 100644
--- a/lib/CodeGen/OcamlGC.cpp
+++ b/lib/CodeGen/OcamlGC.cpp
@@ -20,16 +20,15 @@
using namespace llvm;
namespace {
- class OcamlGC : public GCStrategy {
- public:
- OcamlGC();
- };
+class OcamlGC : public GCStrategy {
+public:
+ OcamlGC();
+};
}
-static GCRegistry::Add<OcamlGC>
-X("ocaml", "ocaml 3.10-compatible GC");
+static GCRegistry::Add<OcamlGC> X("ocaml", "ocaml 3.10-compatible GC");
-void llvm::linkOcamlGC() { }
+void llvm::linkOcamlGC() {}
OcamlGC::OcamlGC() {
NeededSafePoints = 1 << GC::PostCall;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index ec71d86..272d068 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -14,13 +14,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -82,7 +81,9 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
cl::desc("Dump garbage collector data"));
static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
- cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=nullptr));
+ cl::init(false),
+ cl::ZeroOrMore);
+
static cl::opt<std::string>
PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
cl::desc("Print machine instrs"),
@@ -235,8 +236,8 @@ TargetPassConfig::~TargetPassConfig() {
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
: ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
- Started(true), Stopped(false), TM(tm), Impl(nullptr), Initialized(false),
- DisableVerify(false),
+ Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
+ Impl(nullptr), Initialized(false), DisableVerify(false),
EnableTailMerge(true) {
Impl = new PassConfigImpl();
@@ -250,7 +251,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
// Temporarily disable experimental passes.
- const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo &ST = *TM->getSubtargetImpl();
if (!ST.useMachineScheduler())
disablePass(&MachineSchedulerID);
}
@@ -304,7 +305,7 @@ IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
/// a later pass or that it should stop after an earlier pass, then do not add
/// the pass. Finally, compare the current pass against the StartAfter
/// and StopAfter options and change the Started/Stopped flags accordingly.
-void TargetPassConfig::addPass(Pass *P) {
+void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
assert(!Initialized && "PassConfig is immutable");
// Cache the Pass ID here in case the pass manager finds this pass is
@@ -313,10 +314,21 @@ void TargetPassConfig::addPass(Pass *P) {
// and shouldn't reference it.
AnalysisID PassID = P->getPassID();
- if (Started && !Stopped)
+ if (Started && !Stopped) {
+ std::string Banner;
+ // Construct banner message before PM->add() as that may delete the pass.
+ if (AddingMachinePasses && (printAfter || verifyAfter))
+ Banner = std::string("After ") + std::string(P->getPassName());
PM->add(P);
- else
+ if (AddingMachinePasses) {
+ if (printAfter)
+ addPrintPass(Banner);
+ if (verifyAfter)
+ addVerifyPass(Banner);
+ }
+ } else {
delete P;
+ }
if (StopAfter == PassID)
Stopped = true;
if (StartAfter == PassID)
@@ -330,7 +342,8 @@ void TargetPassConfig::addPass(Pass *P) {
///
/// addPass cannot return a pointer to the pass instance because is internal the
/// PassManager and the instance we create here may already be freed.
-AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter,
+ bool printAfter) {
IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
if (!FinalPtr.isValid())
@@ -345,7 +358,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
llvm_unreachable("Pass ID not registered");
}
AnalysisID FinalID = P->getPassID();
- addPass(P); // Ends the lifetime of P.
+ addPass(P, verifyAfter, printAfter); // Ends the lifetime of P.
// Add the passes after the pass P if there is any.
for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator
@@ -360,18 +373,25 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
NP = Pass::createPass((*I).second.getID());
assert(NP && "Pass ID not registered");
}
- addPass(NP);
+ addPass(NP, false, false);
}
}
return FinalID;
}
-void TargetPassConfig::printAndVerify(const char *Banner) {
+void TargetPassConfig::printAndVerify(const std::string &Banner) {
+ addPrintPass(Banner);
+ addVerifyPass(Banner);
+}
+
+void TargetPassConfig::addPrintPass(const std::string &Banner) {
if (TM->shouldPrintMachineCode())
- addPass(createMachineFunctionPrinterPass(dbgs(), Banner));
+ PM->add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+void TargetPassConfig::addVerifyPass(const std::string &Banner) {
if (VerifyMachineCode)
- addPass(createMachineVerifierPass(Banner));
+ PM->add(createMachineVerifierPass(Banner));
}
/// Add common target configurable passes that perform LLVM IR to IR transforms
@@ -401,7 +421,10 @@ void TargetPassConfig::addIRPasses() {
addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
}
+ // Run GC lowering passes for builtin collectors
+ // TODO: add a pass insertion point here
addPass(createGCLoweringPass());
+ addPass(createShadowStackGCLoweringPass());
// Make sure that no unreachable blocks are instruction selected.
addPass(createUnreachableBlockEliminationPass());
@@ -429,9 +452,11 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// FALLTHROUGH
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- case ExceptionHandling::ItaniumWinEH:
addPass(createDwarfEHPass(TM));
break;
+ case ExceptionHandling::WinEH:
+ addPass(createWinEHPass(TM));
+ break;
case ExceptionHandling::None:
addPass(createLowerInvokePass());
@@ -491,6 +516,8 @@ void TargetPassConfig::addISelPrepare() {
/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
/// before/after any target-independent pass. But it's currently overkill.
void TargetPassConfig::addMachinePasses() {
+ AddingMachinePasses = true;
+
// Insert a machine instr printer pass after the specified pass.
// If -print-machineinstrs specified, print machineinstrs after all passes.
if (StringRef(PrintMachineInstrs.getValue()).equals(""))
@@ -499,7 +526,7 @@ void TargetPassConfig::addMachinePasses() {
.equals("option-unspecified")) {
const PassRegistry *PR = PassRegistry::getPassRegistry();
const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
- const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs"));
+ const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
assert (TPI && IPI && "Pass ID not registered!");
const char *TID = (const char *)(TPI->getTypeInfo());
const char *IID = (const char *)(IPI->getTypeInfo());
@@ -510,8 +537,7 @@ void TargetPassConfig::addMachinePasses() {
printAndVerify("After Instruction Selection");
// Expand pseudo-instructions emitted by ISel.
- if (addPass(&ExpandISelPseudosID))
- printAndVerify("After ExpandISelPseudos");
+ addPass(&ExpandISelPseudosID);
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
@@ -519,12 +545,11 @@ void TargetPassConfig::addMachinePasses() {
} else {
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
- addPass(&LocalStackSlotAllocationID);
+ addPass(&LocalStackSlotAllocationID, false);
}
// Run pre-ra passes.
- if (addPreRegAlloc())
- printAndVerify("After PreRegAlloc passes");
+ addPreRegAlloc();
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
@@ -534,12 +559,10 @@ void TargetPassConfig::addMachinePasses() {
addFastRegAlloc(createRegAllocPass(false));
// Run post-ra passes.
- if (addPostRegAlloc())
- printAndVerify("After PostRegAlloc passes");
+ addPostRegAlloc();
// Insert prolog/epilog code. Eliminate abstract frame index references...
addPass(&PrologEpilogCodeInserterID);
- printAndVerify("After PrologEpilogCodeInserter");
/// Add passes that optimize machine instructions after register allocation.
if (getOptLevel() != CodeGenOpt::None)
@@ -547,11 +570,9 @@ void TargetPassConfig::addMachinePasses() {
// Expand pseudo instructions before second scheduling pass.
addPass(&ExpandPostRAPseudosID);
- printAndVerify("After ExpandPostRAPseudos");
// Run pre-sched2 passes.
- if (addPreSched2())
- printAndVerify("After PreSched2 passes");
+ addPreSched2();
// Second pass scheduler.
if (getOptLevel() != CodeGenOpt::None) {
@@ -559,66 +580,61 @@ void TargetPassConfig::addMachinePasses() {
addPass(&PostMachineSchedulerID);
else
addPass(&PostRASchedulerID);
- printAndVerify("After PostRAScheduler");
}
// GC
if (addGCPasses()) {
if (PrintGCInfo)
- addPass(createGCInfoPrinter(dbgs()));
+ addPass(createGCInfoPrinter(dbgs()), false, false);
}
// Basic block placement.
if (getOptLevel() != CodeGenOpt::None)
addBlockPlacement();
- if (addPreEmitPass())
- printAndVerify("After PreEmit passes");
+ addPreEmitPass();
- addPass(&StackMapLivenessID);
+ addPass(&StackMapLivenessID, false);
+
+ AddingMachinePasses = false;
}
/// Add passes that optimize machine instructions in SSA form.
void TargetPassConfig::addMachineSSAOptimization() {
// Pre-ra tail duplication.
- if (addPass(&EarlyTailDuplicateID))
- printAndVerify("After Pre-RegAlloc TailDuplicate");
+ addPass(&EarlyTailDuplicateID);
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
- addPass(&OptimizePHIsID);
+ addPass(&OptimizePHIsID, false);
// This pass merges large allocas. StackSlotColoring is a different pass
// which merges spill slots.
- addPass(&StackColoringID);
+ addPass(&StackColoringID, false);
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
- addPass(&LocalStackSlotAllocationID);
+ addPass(&LocalStackSlotAllocationID, false);
// With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only
// used by tail calls, where the tail calls reuse the incoming stack
// arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
addPass(&DeadMachineInstructionElimID);
- printAndVerify("After codegen DCE pass");
// Allow targets to insert passes that improve instruction level parallelism,
// like if-conversion. Such passes will typically need dominator trees and
// loop info, just like LICM and CSE below.
- if (addILPOpts())
- printAndVerify("After ILP optimizations");
+ addILPOpts();
- addPass(&MachineLICMID);
- addPass(&MachineCSEID);
+ addPass(&MachineLICMID, false);
+ addPass(&MachineCSEID, false);
addPass(&MachineSinkingID);
- printAndVerify("After Machine LICM, CSE and Sinking passes");
- addPass(&PeepholeOptimizerID);
+ addPass(&PeepholeOptimizerID, false);
// Clean-up the dead code that may have been generated by peephole
// rewriting.
addPass(&DeadMachineInstructionElimID);
- printAndVerify("After codegen peephole optimization pass");
}
//===---------------------------------------------------------------------===//
@@ -701,18 +717,17 @@ bool TargetPassConfig::usingDefaultRegAlloc() const {
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
- addPass(&PHIEliminationID);
- addPass(&TwoAddressInstructionPassID);
+ addPass(&PHIEliminationID, false);
+ addPass(&TwoAddressInstructionPassID, false);
addPass(RegAllocPass);
- printAndVerify("After Register Allocation");
}
/// Add standard target-independent passes that are tightly coupled with
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
- addPass(&ProcessImplicitDefsID);
+ addPass(&ProcessImplicitDefsID, false);
// LiveVariables currently requires pure SSA form.
//
@@ -720,35 +735,30 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// LiveVariables can be removed completely, and LiveIntervals can be directly
// computed. (We still either need to regenerate kill flags after regalloc, or
// preferably fix the scavenger to not depend on them).
- addPass(&LiveVariablesID);
+ addPass(&LiveVariablesID, false);
// Edge splitting is smarter with machine loop info.
- addPass(&MachineLoopInfoID);
- addPass(&PHIEliminationID);
+ addPass(&MachineLoopInfoID, false);
+ addPass(&PHIEliminationID, false);
// Eventually, we want to run LiveIntervals before PHI elimination.
if (EarlyLiveIntervals)
- addPass(&LiveIntervalsID);
+ addPass(&LiveIntervalsID, false);
- addPass(&TwoAddressInstructionPassID);
+ addPass(&TwoAddressInstructionPassID, false);
addPass(&RegisterCoalescerID);
- printAndVerify("After Register Coalescing");
// PreRA instruction scheduling.
- if (addPass(&MachineSchedulerID))
- printAndVerify("After Machine Scheduling");
+ addPass(&MachineSchedulerID);
// Add the selected register allocation pass.
addPass(RegAllocPass);
- printAndVerify("After Register Allocation, before rewriter");
// Allow targets to change the register assignments before rewriting.
- if (addPreRewrite())
- printAndVerify("After pre-rewrite passes");
+ addPreRewrite();
// Finally rewrite virtual registers.
addPass(&VirtRegRewriterID);
- printAndVerify("After Virtual Register Rewriter");
// Perform stack slot coloring and post-ra machine LICM.
//
@@ -760,8 +770,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
//
// FIXME: can this move into MachineLateOptimization?
addPass(&PostRAMachineLICMID);
-
- printAndVerify("After StackSlotColoring and postra Machine LICM");
}
//===---------------------------------------------------------------------===//
@@ -771,34 +779,30 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
/// Add passes that optimize machine instructions after register allocation.
void TargetPassConfig::addMachineLateOptimization() {
// Branch folding must be run after regalloc and prolog/epilog insertion.
- if (addPass(&BranchFolderPassID))
- printAndVerify("After BranchFolding");
+ addPass(&BranchFolderPassID);
// Tail duplication.
// Note that duplicating tail just increases code size and degrades
// performance for targets that require Structured Control Flow.
// In addition it can also make CFG irreducible. Thus we disable it.
- if (!TM->requiresStructuredCFG() && addPass(&TailDuplicateID))
- printAndVerify("After TailDuplicate");
+ if (!TM->requiresStructuredCFG())
+ addPass(&TailDuplicateID);
// Copy propagation.
- if (addPass(&MachineCopyPropagationID))
- printAndVerify("After copy propagation pass");
+ addPass(&MachineCopyPropagationID);
}
/// Add standard GC passes.
bool TargetPassConfig::addGCPasses() {
- addPass(&GCMachineCodeAnalysisID);
+ addPass(&GCMachineCodeAnalysisID, false);
return true;
}
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
- if (addPass(&MachineBlockPlacementID)) {
+ if (addPass(&MachineBlockPlacementID, false)) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
addPass(&MachineBlockPlacementStatsID);
-
- printAndVerify("After machine block placement.");
}
}
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index a296aea..283d1f2 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -133,7 +133,8 @@ namespace {
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSetImpl<MachineInstr*> &LocalMIs);
- bool optimizeSelect(MachineInstr *MI);
+ bool optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool optimizeCondBranch(MachineInstr *MI);
bool optimizeCopyOrBitcast(MachineInstr *MI);
bool optimizeCoalescableCopy(MachineInstr *MI);
@@ -482,7 +483,8 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
}
/// Optimize a select instruction.
-bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
unsigned TrueOp = 0;
unsigned FalseOp = 0;
bool Optimizable = false;
@@ -491,7 +493,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
return false;
if (!Optimizable)
return false;
- if (!TII->optimizeSelect(MI))
+ if (!TII->optimizeSelect(MI, LocalMIs))
return false;
MI->eraseFromParent();
++NumSelects;
@@ -1072,6 +1074,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = &*I;
bool SeenMoveImm = false;
+
+ // During this forward scan, at some point it needs to answer the question
+ // "given a pointer to an MI in the current BB, is it located before or
+ // after the current instruction".
+ // To perform this, the following set keeps track of the MIs already seen
+ // during the scan, if a MI is not in the set, it is assumed to be located
+ // after. Newly created MIs have to be inserted in the set as well.
SmallPtrSet<MachineInstr*, 16> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
@@ -1102,7 +1111,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if ((isUncoalescableCopy(*MI) &&
optimizeUncoalescableCopy(MI, LocalMIs)) ||
(MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
- (MI->isSelect() && optimizeSelect(MI))) {
+ (MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
// MI is deleted.
LocalMIs.erase(MI);
Changed = true;
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 89e1d11..ad59fc9 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -282,9 +282,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
} else {
// Check that post-RA scheduling is enabled for this target.
// This may upgrade the AntiDepMode.
- const TargetSubtargetInfo &ST =
- Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
- if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(),
+ if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
AntiDepMode, CriticalPathRCs))
return false;
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 06530b9..6d29b98 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -495,7 +495,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
unsigned Align = MFI->getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
+ Offset = RoundUpToAlignment(Offset, Align);
MFI->setObjectOffset(i, -Offset); // Set the computed offset
}
@@ -504,7 +504,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
for (int i = MaxCSFI; i >= MinCSFI ; --i) {
unsigned Align = MFI->getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
+ Offset = RoundUpToAlignment(Offset, Align);
MFI->setObjectOffset(i, Offset);
Offset += MFI->getObjectSize(i);
@@ -537,7 +537,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
unsigned Align = MFI->getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = (Offset + Align - 1) / Align * Align;
+ Offset = RoundUpToAlignment(Offset, Align);
DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -656,8 +656,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ Offset = RoundUpToAlignment(Offset, StackAlign);
}
// Update frame info to pretend that this is part of the stack...
@@ -703,7 +702,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
/// register references and actual offsets.
///
void PEI::replaceFrameIndices(MachineFunction &Fn) {
- if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ if (!TFI.needsFrameIndexResolution(Fn)) return;
// Store SPAdj at exit of a basic block.
SmallVector<int, 8> SPState;
@@ -743,26 +743,19 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- bool StackGrowsDown =
- TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+ bool InsideCallSequence = false;
+
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
if (I->getOpcode() == FrameSetupOpcode ||
I->getOpcode() == FrameDestroyOpcode) {
- // Remember how much SP has been adjusted to create the call
- // frame.
- int Size = I->getOperand(0).getImm();
-
- if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
- (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
- Size = -Size;
-
- SPAdj += Size;
+ InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
+ SPAdj += TII.getSPAdjust(I);
MachineBasicBlock::iterator PrevI = BB->end();
if (I != BB->begin()) PrevI = std::prev(I);
@@ -797,6 +790,37 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
continue;
}
+ // TODO: This code should be commoned with the code for
+ // PATCHPOINT. There's no good reason for the difference in
+ // implementation other than historical accident. The only
+ // remaining difference is the unconditional use of the stack
+ // pointer as the base register.
+ if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
+ assert((!MI->isDebugValue() || i == 0) &&
+ "Frame indicies can only appear as the first operand of a "
+ "DBG_VALUE machine instruction");
+ unsigned Reg;
+ MachineOperand &Offset = MI->getOperand(i + 1);
+ const unsigned refOffset =
+ TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(),
+ Reg);
+
+ Offset.setImm(Offset.getImm() + refOffset);
+ MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
+ continue;
+ }
+
+ // Frame allocations are target independent. Simply swap the index with
+ // the offset.
+ if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) {
+ assert(TFI->hasFP(Fn) && "frame alloc requires FP");
+ MachineOperand &FI = MI->getOperand(i);
+ unsigned Reg;
+ int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg);
+ FI.ChangeToImmediate(FrameOffset);
+ continue;
+ }
+
// Some instructions (e.g. inline asm instructions) can have
// multiple frame indices and/or cause eliminateFrameIndex
// to insert more than one instruction. We need the register
@@ -823,6 +847,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
break;
}
+ // If we are looking at a call sequence, we need to keep track of
+ // the SP adjustment made by each instruction in the sequence.
+ // This includes both the frame setup/destroy pseudos (handled above),
+ // as well as other instructions that have side effects w.r.t the SP.
+ // Note that this must come after eliminateFrameIndex, because
+ // if I itself referred to a frame index, we shouldn't count its own
+ // adjustment.
+ if (MI && InsideCallSequence)
+ SPAdj += TII.getSPAdjust(MI);
+
if (DoIncr && I != BB->end()) ++I;
// Update register states.
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 122afd1..6b346f4 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -90,6 +90,7 @@ void RegAllocBase::allocatePhysRegs() {
// Unused registers can appear when the spiller coalesces snippets.
if (MRI->reg_nodbg_empty(VirtReg->reg)) {
DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ aboutToRemoveInterval(*VirtReg);
LIS->removeInterval(VirtReg->reg);
continue;
}
@@ -139,6 +140,7 @@ void RegAllocBase::allocatePhysRegs() {
assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ aboutToRemoveInterval(*SplitVirtReg);
LIS->removeInterval(SplitVirtReg->reg);
continue;
}
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index bbd79cd..659b8f5 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -96,6 +96,9 @@ protected:
// Use this group name for NamedRegionTimer.
static const char TimerGroupName[];
+ /// Method called when the allocator is about to remove a LiveInterval.
+ virtual void aboutToRemoveInterval(LiveInterval &LI) {}
+
public:
/// VerifyEnabled - True when -verify-regalloc is given.
static bool VerifyEnabled;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 8fc10b4..c621414 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -372,15 +372,23 @@ void RAFast::usePhysReg(MachineOperand &MO) {
case regDisabled:
break;
case regReserved:
- assert(TRI->isSuperRegister(PhysReg, Alias) &&
+ // Either PhysReg is a subregister of Alias and we mark the
+ // whole register as free, or PhysReg is the superregister of
+ // Alias and we mark all the aliases as disabled before freeing
+ // PhysReg.
+ // In the latter case, since PhysReg was disabled, this means that
+ // its value is defined only by physical sub-registers. This check
+ // is performed by the assert of the default case in this loop.
+ // Note: The value of the superregister may only be partial
+ // defined, that is why regDisabled is a valid state for aliases.
+ assert((TRI->isSuperRegister(PhysReg, Alias) ||
+ TRI->isSuperRegister(Alias, PhysReg)) &&
"Instruction is not using a subregister of a reserved register");
- // Leave the superregister in the working set.
- PhysRegState[Alias] = regFree;
- MO.getParent()->addRegisterKilled(Alias, TRI, true);
- return;
+ // Fall through.
case regFree:
if (TRI->isSuperRegister(PhysReg, Alias)) {
// Leave the superregister in the working set.
+ PhysRegState[Alias] = regFree;
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
}
@@ -1023,8 +1031,7 @@ void RAFast::AllocateBasicBlock() {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
if (!MRI->isAllocatable(Reg)) continue;
- definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
- regFree : regReserved);
+ definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
continue;
}
LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 8ef5dcd..edc3294 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -296,6 +296,9 @@ class RAGreedy : public MachineFunctionPass,
/// obtained from the TargetSubtargetInfo.
bool EnableLocalReassign;
+ /// Set of broken hints that may be reconciled later because of eviction.
+ SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
+
public:
RAGreedy();
@@ -311,6 +314,7 @@ public:
void enqueue(LiveInterval *LI) override;
LiveInterval *dequeue() override;
unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
+ void aboutToRemoveInterval(LiveInterval &) override;
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
@@ -378,6 +382,24 @@ private:
SmallVirtRegSet &, unsigned);
bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
SmallVirtRegSet &, unsigned);
+ void tryHintRecoloring(LiveInterval &);
+ void tryHintsRecoloring();
+
+ /// Model the information carried by one end of a copy.
+ struct HintInfo {
+ /// The frequency of the copy.
+ BlockFrequency Freq;
+ /// The virtual register or physical register.
+ unsigned Reg;
+ /// Its currently assigned register.
+ /// In case of a physical register Reg == PhysReg.
+ unsigned PhysReg;
+ HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
+ : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
+ };
+ typedef SmallVector<HintInfo, 4> HintsInfo;
+ BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
+ void collectHintInfo(unsigned, HintsInfo &);
};
} // end anonymous namespace
@@ -453,7 +475,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
if (VRM->hasPhys(VirtReg)) {
- Matrix->unassign(LIS->getInterval(VirtReg));
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ aboutToRemoveInterval(LI);
return true;
}
// Unassigned virtreg is probably in the priority queue.
@@ -2213,6 +2237,11 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
return PhysReg;
}
+void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
+ // Do not keep invalid information around.
+ SetOfBrokenHints.remove(&LI);
+}
+
void RAGreedy::initializeCSRCost() {
// We use the larger one out of the command-line option and the value report
// by TRI.
@@ -2238,6 +2267,170 @@ void RAGreedy::initializeCSRCost() {
CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
}
+/// \brief Collect the hint info for \p Reg.
+/// The results are stored into \p Out.
+/// \p Out is not cleared before being populated.
+void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
+ for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
+ if (!Instr.isFullCopy())
+ continue;
+ // Look for the other end of the copy.
+ unsigned OtherReg = Instr.getOperand(0).getReg();
+ if (OtherReg == Reg) {
+ OtherReg = Instr.getOperand(1).getReg();
+ if (OtherReg == Reg)
+ continue;
+ }
+ // Get the current assignment.
+ unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
+ ? OtherReg
+ : VRM->getPhys(OtherReg);
+ // Push the collected information.
+ Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
+ OtherPhysReg));
+ }
+}
+
+/// \brief Using the given \p List, compute the cost of the broken hints if
+/// \p PhysReg was used.
+/// \return The cost of \p List for \p PhysReg.
+BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
+ unsigned PhysReg) {
+ BlockFrequency Cost = 0;
+ for (const HintInfo &Info : List) {
+ if (Info.PhysReg != PhysReg)
+ Cost += Info.Freq;
+ }
+ return Cost;
+}
+
+/// \brief Using the register assigned to \p VirtReg, try to recolor
+/// all the live ranges that are copy-related with \p VirtReg.
+/// The recoloring is then propagated to all the live-ranges that have
+/// been recolored and so on, until no more copies can be coalesced or
+/// it is not profitable.
+/// For a given live range, profitability is determined by the sum of the
+/// frequencies of the non-identity copies it would introduce with the old
+/// and new register.
+void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
+ // We have a broken hint, check if it is possible to fix it by
+ // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
+ // some register and PhysReg may be available for the other live-ranges.
+ SmallSet<unsigned, 4> Visited;
+ SmallVector<unsigned, 2> RecoloringCandidates;
+ HintsInfo Info;
+ unsigned Reg = VirtReg.reg;
+ unsigned PhysReg = VRM->getPhys(Reg);
+ // Start the recoloring algorithm from the input live-interval, then
+ // it will propagate to the ones that are copy-related with it.
+ Visited.insert(Reg);
+ RecoloringCandidates.push_back(Reg);
+
+ DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '('
+ << PrintReg(PhysReg, TRI) << ")\n");
+
+ do {
+ Reg = RecoloringCandidates.pop_back_val();
+
+ // We cannot recolor physcal register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
+
+ // Get the live interval mapped with this virtual register to be able
+ // to check for the interference with the new color.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ unsigned CurrPhys = VRM->getPhys(Reg);
+ // Check that the new color matches the register class constraints and
+ // that it is free for this live range.
+ if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
+ Matrix->checkInterference(LI, PhysReg)))
+ continue;
+
+ DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI)
+ << ") is recolorable.\n");
+
+ // Gather the hint info.
+ Info.clear();
+ collectHintInfo(Reg, Info);
+ // Check if recoloring the live-range will increase the cost of the
+ // non-identity copies.
+ if (CurrPhys != PhysReg) {
+ DEBUG(dbgs() << "Checking profitability:\n");
+ BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
+ BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
+ DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
+ << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n');
+ if (OldCopiesCost < NewCopiesCost) {
+ DEBUG(dbgs() << "=> Not profitable.\n");
+ continue;
+ }
+ // At this point, the cost is either cheaper or equal. If it is
+ // equal, we consider this is profitable because it may expose
+ // more recoloring opportunities.
+ DEBUG(dbgs() << "=> Profitable.\n");
+ // Recolor the live-range.
+ Matrix->unassign(LI);
+ Matrix->assign(LI, PhysReg);
+ }
+ // Push all copy-related live-ranges to keep reconciling the broken
+ // hints.
+ for (const HintInfo &HI : Info) {
+ if (Visited.insert(HI.Reg).second)
+ RecoloringCandidates.push_back(HI.Reg);
+ }
+ } while (!RecoloringCandidates.empty());
+}
+
+/// \brief Try to recolor broken hints.
+/// Broken hints may be repaired by recoloring when an evicted variable
+/// freed up a register for a larger live-range.
+/// Consider the following example:
+/// BB1:
+/// a =
+/// b =
+/// BB2:
+/// ...
+/// = b
+/// = a
+/// Let us assume b gets split:
+/// BB1:
+/// a =
+/// b =
+/// BB2:
+/// c = b
+/// ...
+/// d = c
+/// = d
+/// = a
+/// Because of how the allocation work, b, c, and d may be assigned different
+/// colors. Now, if a gets evicted later:
+/// BB1:
+/// a =
+/// st a, SpillSlot
+/// b =
+/// BB2:
+/// c = b
+/// ...
+/// d = c
+/// = d
+/// e = ld SpillSlot
+/// = e
+/// This is likely that we can assign the same register for b, c, and d,
+/// getting rid of 2 copies.
+void RAGreedy::tryHintsRecoloring() {
+ for (LiveInterval *LI : SetOfBrokenHints) {
+ assert(TargetRegisterInfo::isVirtualRegister(LI->reg) &&
+ "Recoloring is possible only for virtual registers");
+ // Some dead defs may be around (e.g., because of debug uses).
+ // Ignore those.
+ if (!VRM->hasPhys(LI->reg))
+ continue;
+ tryHintRecoloring(*LI);
+ }
+}
+
unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
SmallVectorImpl<unsigned> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
@@ -2274,8 +2467,18 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// queue. The RS_Split ranges already failed to do this, and they should not
// get a second chance until they have been split.
if (Stage != RS_Split)
- if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit))
+ if (unsigned PhysReg =
+ tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
+ unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
+ // If VirtReg has a hint and that hint is broken record this
+ // virtual register as a recoloring candidate for broken hint.
+ // Indeed, since we evicted a variable in its neighborhood it is
+ // likely we can at least partially recolor some of the
+ // copy-related live-ranges.
+ if (Hint && Hint != PhysReg)
+ SetOfBrokenHints.insert(&VirtReg);
return PhysReg;
+ }
assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
@@ -2355,8 +2558,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
NextCascade = 1;
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
+ SetOfBrokenHints.clear();
allocatePhysRegs();
+ tryHintsRecoloring();
releaseMemory();
return true;
}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index eb7e5633..77a42b3 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -126,7 +126,12 @@ private:
void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
/// \brief Constructs an initial graph.
- void initializeGraph(PBQPRAGraph &G);
+ void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
+
+ /// \brief Spill the given VReg.
+ void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
+ MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
+ Spiller &VRegSpiller);
/// \brief Given a solved PBQP problem maps this solution back to a register
/// assignment.
@@ -172,8 +177,6 @@ public:
class Interference : public PBQPRAConstraint {
private:
-private:
-
typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey;
typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
@@ -308,7 +311,7 @@ private:
PBQPRAGraph::NodeId MId, IMatrixCache &C) {
const TargetRegisterInfo &TRI =
- *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ *G.getMetadata().MF.getSubtarget().getRegisterInfo();
const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
@@ -342,7 +345,7 @@ public:
void apply(PBQPRAGraph &G) override {
MachineFunction &MF = G.getMetadata().MF;
MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
- CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo());
+ CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
// Scan the machine function and add a coalescing cost whenever CoalescerPair
// gives the Ok.
@@ -398,7 +401,7 @@ public:
}
PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
- G.setEdgeCosts(EId, std::move(Costs));
+ G.updateEdgeCosts(EId, std::move(Costs));
}
}
}
@@ -488,15 +491,21 @@ static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
return false;
}
-void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
+void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
+ Spiller &VRegSpiller) {
MachineFunction &MF = G.getMetadata().MF;
LiveIntervals &LIS = G.getMetadata().LIS;
const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
const TargetRegisterInfo &TRI =
- *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ *G.getMetadata().MF.getSubtarget().getRegisterInfo();
+
+ std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
+
+ while (!Worklist.empty()) {
+ unsigned VReg = Worklist.back();
+ Worklist.pop_back();
- for (auto VReg : VRegsToAlloc) {
const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
LiveInterval &VRegLI = LIS.getInterval(VReg);
@@ -531,6 +540,15 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
VRegAllowed.push_back(PReg);
}
+ // Check for vregs that have no allowed registers. These should be
+ // pre-spilled and the new vregs added to the worklist.
+ if (VRegAllowed.empty()) {
+ SmallVector<unsigned, 8> NewVRegs;
+ spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+ Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
+ continue;
+ }
+
PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
// Tweak cost of callee saved registers, as using then force spilling and
@@ -547,14 +565,40 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
}
}
+void RegAllocPBQP::spillVReg(unsigned VReg,
+ SmallVectorImpl<unsigned> &NewIntervals,
+ MachineFunction &MF, LiveIntervals &LIS,
+ VirtRegMap &VRM, Spiller &VRegSpiller) {
+
+ VRegsToAlloc.erase(VReg);
+ LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
+ VRegSpiller.spill(LRE);
+
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ (void)TRI;
+ DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
+ << LRE.getParent().weight << ", New vregs: ");
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
+ I != E; ++I) {
+ const LiveInterval &LI = LIS.getInterval(*I);
+ assert(!LI.empty() && "Empty spill range.");
+ DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
+ VRegsToAlloc.insert(LI.reg);
+ }
+
+ DEBUG(dbgs() << ")\n");
+}
+
bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
const PBQP::Solution &Solution,
VirtRegMap &VRM,
Spiller &VRegSpiller) {
MachineFunction &MF = G.getMetadata().MF;
LiveIntervals &LIS = G.getMetadata().LIS;
- const TargetRegisterInfo &TRI =
- *MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
(void)TRI;
// Set to true if we have any spills
@@ -576,28 +620,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
assert(PReg != 0 && "Invalid preg selected.");
VRM.assignVirt2Phys(VReg, PReg);
} else {
- VRegsToAlloc.erase(VReg);
- SmallVector<unsigned, 8> NewSpills;
- LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM);
- VRegSpiller.spill(LRE);
-
- DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
- << LRE.getParent().weight << ", New vregs: ");
-
- // Copy any newly inserted live intervals into the list of regs to
- // allocate.
- for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
- I != E; ++I) {
- LiveInterval &LI = LIS.getInterval(*I);
- assert(!LI.empty() && "Empty spill range.");
- DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
- VRegsToAlloc.insert(LI.reg);
- }
-
- DEBUG(dbgs() << ")\n");
-
- // We need another round if spill intervals were added.
- AnotherRoundNeeded |= !LRE.empty();
+ // Spill VReg. If this introduces new intervals we'll need another round
+ // of allocation.
+ SmallVector<unsigned, 8> NewVRegs;
+ spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+ AnotherRoundNeeded |= !NewVRegs.empty();
}
}
@@ -670,7 +697,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// If there are non-empty intervals allocate them using pbqp.
if (!VRegsToAlloc.empty()) {
- const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl();
+ const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
llvm::make_unique<PBQPRAConstraintList>();
ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
@@ -686,7 +713,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
- initializeGraph(G);
+ initializeGraph(G, VRM, *VRegSpiller);
ConstraintsRoot->apply(G);
#ifndef NDEBUG
@@ -699,7 +726,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
<< GraphFileName << "\"\n");
- G.dumpToStream(OS);
+ G.dump(OS);
}
#endif
@@ -719,6 +746,79 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
return true;
}
+namespace {
+// A helper class for printing node and register info in a consistent way
+class PrintNodeInfo {
+public:
+ typedef PBQP::RegAlloc::PBQPRAGraph Graph;
+ typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId;
+
+ PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {}
+
+ void print(raw_ostream &OS) const {
+ const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
+ OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
+ }
+
+private:
+ const Graph &G;
+ NodeId NId;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) {
+ PR.print(OS);
+ return OS;
+}
+} // anonymous namespace
+
+void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
+ for (auto NId : nodeIds()) {
+ const Vector &Costs = getNodeCosts(NId);
+ assert(Costs.getLength() != 0 && "Empty vector in graph.");
+ OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
+ }
+ OS << '\n';
+
+ for (auto EId : edgeIds()) {
+ NodeId N1Id = getEdgeNode1Id(EId);
+ NodeId N2Id = getEdgeNode2Id(EId);
+ assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
+ const Matrix &M = getEdgeCosts(EId);
+ assert(M.getRows() != 0 && "No rows in matrix.");
+ assert(M.getCols() != 0 && "No cols in matrix.");
+ OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
+ OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
+ OS << M << '\n';
+ }
+}
+
+void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
+
+void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
+ OS << "graph {\n";
+ for (auto NId : nodeIds()) {
+ OS << " node" << NId << " [ label=\""
+ << PrintNodeInfo(NId, *this) << "\\n"
+ << getNodeCosts(NId) << "\" ]\n";
+ }
+
+ OS << " edge [ len=" << nodeIds().size() << " ]\n";
+ for (auto EId : edgeIds()) {
+ OS << " node" << getEdgeNode1Id(EId)
+ << " -- node" << getEdgeNode2Id(EId)
+ << " [ label=\"";
+ const Matrix &EdgeCosts = getEdgeCosts(EId);
+ for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
+ OS << EdgeCosts.getRowAsVector(i) << "\\n";
+ }
+ OS << "\" ]\n";
+ }
+ OS << "}\n";
+}
+
FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
return new RegAllocPBQP(customPassID);
}
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index e0d1aa2..ab33672 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -47,6 +47,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
}
// Does this MF have different CSRs?
+ assert(TRI && "no register info set");
const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
if (Update || CSR != CalleeSaved) {
// Build a CSRNum map. Every CSR alias gets an entry pointing to the last
@@ -76,6 +77,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
/// registers filtered out. Volatile registers come first followed by CSR
/// aliases ordered according to the CSR order specified by the target.
void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+ assert(RC && "no register class given");
RCInfo &RCI = RegClass[RC->getID()];
// Raw register count, including all reserved regs.
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 2d2dc92..1e4cfe8 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -57,12 +58,12 @@ EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
cl::init(true));
-// Temporary flag to test critical edge unsplitting.
+/// Temporary flag to test critical edge unsplitting.
static cl::opt<bool>
EnableJoinSplits("join-splitedges",
cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
-// Temporary flag to test global copy optimization.
+/// Temporary flag to test global copy optimization.
static cl::opt<cl::boolOrDefault>
EnableGlobalCopies("join-globalcopies",
cl::desc("Coalesce copies that span blocks (default=subtarget)"),
@@ -86,6 +87,14 @@ namespace {
AliasAnalysis *AA;
RegisterClassInfo RegClassInfo;
+ /// A LaneMask to remember on which subregister live ranges we need to call
+ /// shrinkToUses() later.
+ unsigned ShrinkMask;
+
+ /// True if the main range of the currently coalesced intervals should be
+ /// checked for smaller live intervals.
+ bool ShrinkMainRange;
+
/// \brief True if the coalescer should aggressively coalesce global copies
/// in favor of keeping local copies.
bool JoinGlobalCopies;
@@ -111,7 +120,7 @@ namespace {
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
- /// LiveRangeEdit callback.
+ /// LiveRangeEdit callback for eliminateDeadDefs().
void LRE_WillEraseInstruction(MachineInstr *MI) override;
/// Coalesce the LocalWorkList.
@@ -124,16 +133,15 @@ namespace {
/// copies that cannot yet be coalesced into WorkList.
void copyCoalesceInMBB(MachineBasicBlock *MBB);
- /// Try to coalesce all copies in CurrList. Return
- /// true if any progress was made.
+ /// Tries to coalesce all copies in CurrList. Returns true if any progress
+ /// was made.
bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
- /// Attempt to join intervals corresponding to SrcReg/DstReg,
- /// which are the src/dst of the copy instruction CopyMI. This returns
- /// true if the copy was successfully coalesced away. If it is not
- /// currently possible to coalesce this interval, but it may be possible if
- /// other things get coalesced, then it returns true by reference in
- /// 'Again'.
+ /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
+ /// src/dst of the copy instruction CopyMI. This returns true if the copy
+ /// was successfully coalesced away. If it is not currently possible to
+ /// coalesce this interval, but it may be possible if other things get
+ /// coalesced, then it returns true by reference in 'Again'.
bool joinCopy(MachineInstr *TheCopy, bool &Again);
/// Attempt to join these two intervals. On failure, this
@@ -147,10 +155,23 @@ namespace {
/// Attempt joining with a reserved physreg.
bool joinReservedPhysReg(CoalescerPair &CP);
- /// We found a non-trivially-coalescable copy. If
- /// the source value number is defined by a copy from the destination reg
- /// see if we can merge these two destination reg valno# into a single
- /// value number, eliminating a copy.
+ /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
+ /// Subranges in @p LI which only partially interfere with the desired
+ /// LaneMask are split as necessary. @p LaneMask are the lanes that
+ /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
+ /// lanemasks already adjusted to the coalesced register.
+ void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+ unsigned LaneMask, CoalescerPair &CP);
+
+ /// Join the liveranges of two subregisters. Joins @p RRange into
+ /// @p LRange, @p RRange may be invalid afterwards.
+ void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ unsigned LaneMask, const CoalescerPair &CP);
+
+ /// We found a non-trivially-coalescable copy. If the source value number is
+ /// defined by a copy from the destination reg see if we can merge these two
+ /// destination reg valno# into a single value number, eliminating a copy.
+ /// This returns true if an interval was modified.
bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
/// Return true if there are definitions of IntB
@@ -162,6 +183,7 @@ namespace {
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
+ /// This returns true if an interval was modified.
bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
/// If the source of a copy is defined by a
@@ -169,21 +191,21 @@ namespace {
bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI,
bool &IsDefCopy);
- /// Return true if a physreg copy should be joined.
+ /// Return true if a copy involving a physreg should be joined.
bool canJoinPhys(const CoalescerPair &CP);
- /// Replace all defs and uses of SrcReg to DstReg and
- /// update the subregister number if it is not zero. If DstReg is a
- /// physical register and the existing subregister number of the def / use
- /// being updated is not zero, make sure to set it to the correct physical
- /// subregister.
+ /// Replace all defs and uses of SrcReg to DstReg and update the subregister
+ /// number if it is not zero. If DstReg is a physical register and the
+ /// existing subregister number of the def / use being updated is not zero,
+ /// make sure to set it to the correct physical subregister.
void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
/// Handle copies of undef values.
- bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
+ /// Returns true if @p CopyMI was a copy of an undef value and eliminated.
+ bool eliminateUndefCopy(MachineInstr *CopyMI);
public:
- static char ID; // Class identification, replacement for typeinfo
+ static char ID; ///< Class identification, replacement for typeinfo
RegisterCoalescer() : MachineFunctionPass(ID) {
initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
}
@@ -198,7 +220,7 @@ namespace {
/// Implement the dump method.
void print(raw_ostream &O, const Module* = nullptr) const override;
};
-} /// end anonymous namespace
+} // end anonymous namespace
char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
@@ -232,11 +254,11 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
return true;
}
-// Return true if this block should be vacated by the coalescer to eliminate
-// branches. The important cases to handle in the coalescer are critical edges
-// split during phi elimination which contain only copies. Simple blocks that
-// contain non-branches should also be vacated, but this can be handled by an
-// earlier pass similar to early if-conversion.
+/// Return true if this block should be vacated by the coalescer to eliminate
+/// branches. The important cases to handle in the coalescer are critical edges
+/// split during phi elimination which contain only copies. Simple blocks that
+/// contain non-branches should also be vacated, but this can be handled by an
+/// earlier pass similar to early if-conversion.
static bool isSplitEdge(const MachineBasicBlock *MBB) {
if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
return false;
@@ -401,27 +423,11 @@ void RegisterCoalescer::eliminateDeadDefs() {
nullptr, this).eliminateDeadDefs(DeadDefs);
}
-// Callback from eliminateDeadDefs().
void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
// MI may be in WorkList. Make sure we don't visit it.
ErasedInstrs.insert(MI);
}
-/// We found a non-trivially-coalescable copy with IntA
-/// being the source and IntB being the dest, thus this defines a value number
-/// in IntB. If the source value number (in IntA) is defined by a copy from B,
-/// see if we can merge these two pieces of B into a single value number,
-/// eliminating a copy. For example:
-///
-/// A3 = B0
-/// ...
-/// B1 = A3 <- this copy
-///
-/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
-/// value number to be replaced with B0 (which simplifies the B liveinterval).
-///
-/// This returns true if an interval was modified.
-///
bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
MachineInstr *CopyMI) {
assert(!CP.isPartial() && "This doesn't work for partial copies.");
@@ -433,6 +439,20 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+ // We have a non-trivially-coalescable copy with IntA being the source and
+ // IntB being the dest, thus this defines a value number in IntB. If the
+ // source value number (in IntA) is defined by a copy from B, see if we can
+ // merge these two pieces of B into a single value number, eliminating a copy.
+ // For example:
+ //
+ // A3 = B0
+ // ...
+ // B1 = A3 <- this copy
+ //
+ // In this case, B0 can be extended to where the B1 copy lives, allowing the
+ // B1 value number to be replaced with B0 (which simplifies the B
+ // liveinterval).
+
// BValNo is a value number in B that is defined by a copy from A. 'B1' in
// the example above.
LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
@@ -492,6 +512,16 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// Okay, merge "B1" into the same value number as "B0".
if (BValNo != ValS->valno)
IntB.MergeValueNumberInto(BValNo, ValS->valno);
+
+ // Do the same for the subregister segments.
+ for (LiveInterval::SubRange &S : IntB.subranges()) {
+ VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
+ S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
+ VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
+ if (SubBValNo != SubValSNo)
+ S.MergeValueNumberInto(SubBValNo, SubValSNo);
+ }
+
DEBUG(dbgs() << " result = " << IntB << '\n');
// If the source instruction was killing the source register before the
@@ -512,8 +542,6 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
return true;
}
-/// Return true if there are definitions of IntB
-/// other than BValNo val# that can reach uses of AValno val# of IntA.
bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
LiveInterval &IntB,
VNInfo *AValNo,
@@ -523,69 +551,75 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
if (LIS->hasPHIKill(IntA, AValNo))
return true;
- for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
- AI != AE; ++AI) {
- if (AI->valno != AValNo) continue;
+ for (LiveRange::Segment &ASeg : IntA.segments) {
+ if (ASeg.valno != AValNo) continue;
LiveInterval::iterator BI =
- std::upper_bound(IntB.begin(), IntB.end(), AI->start);
+ std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
if (BI != IntB.begin())
--BI;
- for (; BI != IntB.end() && AI->end >= BI->start; ++BI) {
+ for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
if (BI->valno == BValNo)
continue;
- if (BI->start <= AI->start && BI->end > AI->start)
+ if (BI->start <= ASeg.start && BI->end > ASeg.start)
return true;
- if (BI->start > AI->start && BI->start < AI->end)
+ if (BI->start > ASeg.start && BI->start < ASeg.end)
return true;
}
}
return false;
}
-/// We found a non-trivially-coalescable copy with
-/// IntA being the source and IntB being the dest, thus this defines a value
-/// number in IntB. If the source value number (in IntA) is defined by a
-/// commutable instruction and its other operand is coalesced to the copy dest
-/// register, see if we can transform the copy into a noop by commuting the
-/// definition. For example,
-///
-/// A3 = op A2 B0<kill>
-/// ...
-/// B1 = A3 <- this copy
-/// ...
-/// = op A3 <- more uses
-///
-/// ==>
-///
-/// B2 = op B0 A2<kill>
-/// ...
-/// B1 = B2 <- now an identify copy
-/// ...
-/// = op B2 <- more uses
-///
-/// This returns true if an interval was modified.
-///
+/// Copy segements with value number @p SrcValNo from liverange @p Src to live
+/// range @Dst and use value number @p DstValNo there.
+static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
+ const LiveRange &Src, const VNInfo *SrcValNo)
+{
+ for (const LiveRange::Segment &S : Src.segments) {
+ if (S.valno != SrcValNo)
+ continue;
+ Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
+ }
+}
+
bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *CopyMI) {
- assert (!CP.isPhys());
-
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+ assert(!CP.isPhys());
LiveInterval &IntA =
- LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
LiveInterval &IntB =
- LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // We found a non-trivially-coalescable copy with IntA being the source and
+ // IntB being the dest, thus this defines a value number in IntB. If the
+ // source value number (in IntA) is defined by a commutable instruction and
+ // its other operand is coalesced to the copy dest register, see if we can
+ // transform the copy into a noop by commuting the definition. For example,
+ //
+ // A3 = op A2 B0<kill>
+ // ...
+ // B1 = A3 <- this copy
+ // ...
+ // = op A3 <- more uses
+ //
+ // ==>
+ //
+ // B2 = op B0 A2<kill>
+ // ...
+ // B1 = B2 <- now an identity copy
+ // ...
+ // = op B2 <- more uses
// BValNo is a value number in B that is defined by a copy from A. 'B1' in
// the example above.
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
- if (!BValNo || BValNo->def != CopyIdx)
- return false;
+ assert(BValNo != nullptr && BValNo->def == CopyIdx);
// AValNo is the value number in A that defines the copy, A3 in the example.
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
- assert(AValNo && "COPY source not live");
- if (AValNo->isPHIDef() || AValNo->isUnused())
+ assert(AValNo && !AValNo->isUnused() && "COPY source not live");
+ if (AValNo->isPHIDef())
return false;
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
@@ -652,8 +686,6 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
MBB->insert(Pos, NewMI);
MBB->erase(DefMI);
}
- unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
- NewMI->getOperand(OpIdx).setIsKill();
// If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
// A = or A, B
@@ -666,10 +698,13 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// Update uses of IntA of the specific Val# with IntB.
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
- UE = MRI->use_end(); UI != UE;) {
+ UE = MRI->use_end();
+ UI != UE; /* ++UI is below because of possible MI removal */) {
MachineOperand &UseMO = *UI;
- MachineInstr *UseMI = UseMO.getParent();
++UI;
+ if (UseMO.isUndef())
+ continue;
+ MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugValue()) {
// FIXME These don't have an instruction index. Not clear we have enough
// info to decide whether to do this replacement or not. For now do it.
@@ -678,7 +713,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
}
SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
- if (US == IntA.end() || US->valno != AValNo)
+ assert(US != IntA.end() && "Use must be live");
+ if (US->valno != AValNo)
continue;
// Kill flags are no longer accurate. They are recomputed after RA.
UseMO.setIsKill(false);
@@ -702,7 +738,16 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
assert(DVNI->def == DefIdx);
- BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+ BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
+ for (LiveInterval::SubRange &S : IntB.subranges()) {
+ VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
+ if (!SubDVNI)
+ continue;
+ VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
+ assert(SubBValNo->def == CopyIdx);
+ S.MergeValueNumberInto(SubDVNI, SubBValNo);
+ }
+
ErasedInstrs.insert(UseMI);
LIS->RemoveMachineInstrFromMaps(UseMI);
UseMI->eraseFromParent();
@@ -710,23 +755,82 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// Extend BValNo by merging in IntA live segments of AValNo. Val# definition
// is updated.
- VNInfo *ValNo = BValNo;
- ValNo->def = AValNo->def;
- for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
- AI != AE; ++AI) {
- if (AI->valno != AValNo) continue;
- IntB.addSegment(LiveInterval::Segment(AI->start, AI->end, ValNo));
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ if (IntB.hasSubRanges()) {
+ if (!IntA.hasSubRanges()) {
+ unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
+ IntA.createSubRangeFrom(Allocator, Mask, IntA);
+ }
+ SlotIndex AIdx = CopyIdx.getRegSlot(true);
+ for (LiveInterval::SubRange &SA : IntA.subranges()) {
+ VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
+ assert(ASubValNo != nullptr);
+
+ unsigned AMask = SA.LaneMask;
+ for (LiveInterval::SubRange &SB : IntB.subranges()) {
+ unsigned BMask = SB.LaneMask;
+ unsigned Common = BMask & AMask;
+ if (Common == 0)
+ continue;
+
+ DEBUG(
+ dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common));
+ unsigned BRest = BMask & ~AMask;
+ LiveInterval::SubRange *CommonRange;
+ if (BRest != 0) {
+ SB.LaneMask = BRest;
+ DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest));
+ // Duplicate SubRange for newly merged common stuff.
+ CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB);
+ } else {
+ // We van reuse the L SubRange.
+ SB.LaneMask = Common;
+ CommonRange = &SB;
+ }
+ LiveRange RangeCopy(SB, Allocator);
+
+ VNInfo *BSubValNo = CommonRange->getVNInfoAt(CopyIdx);
+ assert(BSubValNo->def == CopyIdx);
+ BSubValNo->def = ASubValNo->def;
+ addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo);
+ AMask &= ~BMask;
+ }
+ if (AMask != 0) {
+ DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask));
+ LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
+ VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
+ addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
+ }
+ }
}
+
+ BValNo->def = AValNo->def;
+ addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
- IntA.removeValNo(AValNo);
+ LIS->removeVRegDefAt(IntA, AValNo->def);
+
DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
++numCommutes;
return true;
}
-/// If the source of a copy is defined by a trivial
-/// computation, replace the copy by rematerialize the definition.
+/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
+/// defining a subregister.
+static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "This code cannot handle physreg aliasing");
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ continue;
+ // Return true if we define the full register or don't care about the value
+ // inside other subregisters.
+ if (Op.getSubReg() == 0 || Op.isUndef())
+ return true;
+ }
+ return false;
+}
+
bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
MachineInstr *CopyMI,
bool &IsDefCopy) {
@@ -755,6 +859,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
return false;
if (!TII->isTriviallyReMaterializable(DefMI, AA))
return false;
+ if (!definesFullReg(*DefMI, SrcReg))
+ return false;
bool SawStore = false;
if (!DefMI->isSafeToMove(TII, AA, SawStore))
return false;
@@ -825,12 +931,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
const TargetRegisterClass *NewRC = CP.getNewRC();
unsigned NewIdx = NewMI->getOperand(0).getSubReg();
- if (NewIdx)
- NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
- else
- NewRC = TRI->getCommonSubClass(NewRC, DefRC);
-
- assert(NewRC && "subreg chosen for remat incompatible with instruction");
+ if (DefRC != nullptr) {
+ if (NewIdx)
+ NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
+ else
+ NewRC = TRI->getCommonSubClass(NewRC, DefRC);
+ assert(NewRC && "subreg chosen for remat incompatible with instruction");
+ }
MRI->setRegClass(DstReg, NewRC);
updateRegDefsUses(DstReg, DstReg, DstIdx);
@@ -898,56 +1005,103 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
// The source interval can become smaller because we removed a use.
LIS->shrinkToUses(&SrcInt, &DeadDefs);
- if (!DeadDefs.empty())
+ if (!DeadDefs.empty()) {
+ // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
+ // to describe DstReg instead.
+ for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
+ MachineInstr *UseMI = UseMO.getParent();
+ if (UseMI->isDebugValue()) {
+ UseMO.setReg(DstReg);
+ DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
+ }
+ }
eliminateDeadDefs();
+ }
return true;
}
-/// ProcessImpicitDefs may leave some copies of <undef>
-/// values, it only removes local variables. When we have a copy like:
-///
-/// %vreg1 = COPY %vreg2<undef>
-///
-/// We delete the copy and remove the corresponding value number from %vreg1.
-/// Any uses of that value number are marked as <undef>.
-bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
- const CoalescerPair &CP) {
+bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
+ // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
+ // local variables. When we have a copy like:
+ //
+ // %vreg1 = COPY %vreg2<undef>
+ //
+ // We delete the copy and remove the corresponding value number from %vreg1.
+ // Any uses of that value number are marked as <undef>.
+
+ // Note that we do not query CoalescerPair here but redo isMoveInstr as the
+ // CoalescerPair may have a new register class with adjusted subreg indices
+ // at this point.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+
SlotIndex Idx = LIS->getInstructionIndex(CopyMI);
- LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg());
- if (SrcInt->liveAt(Idx))
- return false;
- LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg());
- if (DstInt->liveAt(Idx))
+ const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+ // CopyMI is undef iff SrcReg is not live before the instruction.
+ if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
+ unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
+ for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
+ if ((SR.LaneMask & SrcMask) == 0)
+ continue;
+ if (SR.liveAt(Idx))
+ return false;
+ }
+ } else if (SrcLI.liveAt(Idx))
return false;
- // No intervals are live-in to CopyMI - it is undef.
- if (CP.isFlipped())
- DstInt = SrcInt;
- SrcInt = nullptr;
+ DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
- VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
- assert(DeadVNI && "No value defined in DstInt");
- DstInt->removeValNo(DeadVNI);
+ // Remove any DstReg segments starting at the instruction.
+ LiveInterval &DstLI = LIS->getInterval(DstReg);
+ SlotIndex RegIndex = Idx.getRegSlot();
+ // Remove value or merge with previous one in case of a subregister def.
+ if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
+ VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
+ DstLI.MergeValueNumberInto(VNI, PrevVNI);
- // Find new undef uses.
- for (MachineOperand &MO : MRI->reg_nodbg_operands(DstInt->reg)) {
- if (MO.isDef() || MO.isUndef())
+ // The affected subregister segments can be removed.
+ unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
+ for (LiveInterval::SubRange &SR : DstLI.subranges()) {
+ if ((SR.LaneMask & DstMask) == 0)
+ continue;
+
+ VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
+ assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
+ SR.removeValNo(SVNI);
+ }
+ DstLI.removeEmptySubRanges();
+ } else
+ LIS->removeVRegDefAt(DstLI, RegIndex);
+
+ // Mark uses as undef.
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
+ if (MO.isDef() /*|| MO.isUndef()*/)
continue;
- MachineInstr *MI = MO.getParent();
- SlotIndex Idx = LIS->getInstructionIndex(MI);
- if (DstInt->liveAt(Idx))
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex UseIdx = LIS->getInstructionIndex(&MI);
+ unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ bool isLive;
+ if (UseMask != ~0u && DstLI.hasSubRanges()) {
+ isLive = false;
+ for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
+ if ((SR.LaneMask & UseMask) == 0)
+ continue;
+ if (SR.liveAt(UseIdx)) {
+ isLive = true;
+ break;
+ }
+ }
+ } else
+ isLive = DstLI.liveAt(UseIdx);
+ if (isLive)
continue;
MO.setIsUndef(true);
- DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI);
+ DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
}
return true;
}
-/// Replace all defs and uses of SrcReg to DstReg and update the subregister
-/// number if it is not zero. If DstReg is a physical register and the existing
-/// subregister number of the def / use being updated is not zero, make sure to
-/// set it to the correct physical subregister.
void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
unsigned DstReg,
unsigned SubIdx) {
@@ -987,6 +1141,40 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
if (SubIdx && MO.isDef())
MO.setIsUndef(!Reads);
+ // A subreg use of a partially undef (super) register may be a complete
+ // undef use now and then has to be marked that way.
+ if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) {
+ if (!DstInt->hasSubRanges()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+ DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
+ }
+ unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx);
+ bool IsUndef = true;
+ SlotIndex MIIdx = UseMI->isDebugValue()
+ ? LIS->getSlotIndexes()->getIndexBefore(UseMI)
+ : LIS->getInstructionIndex(UseMI);
+ SlotIndex UseIdx = MIIdx.getRegSlot(true);
+ for (LiveInterval::SubRange &S : DstInt->subranges()) {
+ if ((S.LaneMask & Mask) == 0)
+ continue;
+ if (S.liveAt(UseIdx)) {
+ IsUndef = false;
+ break;
+ }
+ }
+ if (IsUndef) {
+ MO.setIsUndef(true);
+ // We found out some subregister use is actually reading an undefined
+ // value. In some cases the whole vreg has become undefined at this
+ // point so we have to potentially shrink the main range if the
+ // use was ending a live segment there.
+ LiveQueryResult Q = DstInt->Query(MIIdx);
+ if (Q.valueOut() == nullptr)
+ ShrinkMainRange = true;
+ }
+ }
+
if (DstIsPhys)
MO.substPhysReg(DstReg, *TRI);
else
@@ -1002,29 +1190,23 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
}
}
-/// Return true if a copy involving a physreg should be joined.
bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
- /// Always join simple intervals that are defined by a single copy from a
- /// reserved register. This doesn't increase register pressure, so it is
- /// always beneficial.
+ // Always join simple intervals that are defined by a single copy from a
+ // reserved register. This doesn't increase register pressure, so it is
+ // always beneficial.
if (!MRI->isReserved(CP.getDstReg())) {
DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
return false;
}
LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
- if (CP.isFlipped() && JoinVInt.containsOneValue())
+ if (JoinVInt.containsOneValue())
return true;
- DEBUG(dbgs() << "\tCannot join defs into reserved register.\n");
+ DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n");
return false;
}
-/// Attempt to join intervals corresponding to SrcReg/DstReg,
-/// which are the src/dst of the copy instruction CopyMI. This returns true
-/// if the copy was successfully coalesced away. If it is not currently
-/// possible to coalesce this interval, but it may be possible if other
-/// things get coalesced, then it returns true by reference in 'Again'.
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
Again = false;
@@ -1063,8 +1245,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
// Eliminate undefs.
- if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) {
- DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n");
+ if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) {
LIS->RemoveMachineInstrFromMaps(CopyMI);
CopyMI->eraseFromParent();
return false; // Not coalescable.
@@ -1076,12 +1257,22 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (CP.getSrcReg() == CP.getDstReg()) {
LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
- LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(CopyMI));
+ const SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI);
+ LiveQueryResult LRQ = LI.Query(CopyIdx);
if (VNInfo *DefVNI = LRQ.valueDefined()) {
VNInfo *ReadVNI = LRQ.valueIn();
assert(ReadVNI && "No value before copy and no <undef> flag.");
assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
LI.MergeValueNumberInto(DefVNI, ReadVNI);
+
+ // Process subregister liveranges.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ LiveQueryResult SLRQ = S.Query(CopyIdx);
+ if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
+ VNInfo *SReadVNI = SLRQ.valueIn();
+ S.MergeValueNumberInto(SDefVNI, SReadVNI);
+ }
+ }
DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
}
LIS->RemoveMachineInstrFromMaps(CopyMI);
@@ -1124,6 +1315,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
});
}
+ ShrinkMask = 0;
+ ShrinkMainRange = false;
+
// Okay, attempt to join these two intervals. On failure, this returns false.
// Otherwise, if one of the intervals being joined is a physreg, this method
// always canonicalizes DstInt to be it. The output "SrcInt" will not have
@@ -1178,12 +1372,28 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+ // Shrink subregister ranges if necessary.
+ if (ShrinkMask != 0) {
+ LiveInterval &LI = LIS->getInterval(CP.getDstReg());
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & ShrinkMask) == 0)
+ continue;
+ DEBUG(dbgs() << "Shrink LaneUses (Lane "
+ << format("%04X", S.LaneMask) << ")\n");
+ LIS->shrinkToUses(S, LI.reg);
+ }
+ }
+ if (ShrinkMainRange) {
+ LiveInterval &LI = LIS->getInterval(CP.getDstReg());
+ LIS->shrinkToUses(&LI);
+ }
+
// SrcReg is guaranteed to be the register whose live interval that is
// being merged.
LIS->removeInterval(CP.getSrcReg());
// Update regalloc hint.
- TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
+ TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
DEBUG({
dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
@@ -1200,24 +1410,23 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
return true;
}
-/// Attempt joining with a reserved physreg.
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+ unsigned DstReg = CP.getDstReg();
assert(CP.isPhys() && "Must be a physreg copy");
- assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
+ assert(MRI->isReserved(DstReg) && "Not a reserved register");
LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
- assert(CP.isFlipped() && RHS.containsOneValue() &&
- "Invalid join with reserved register");
+ assert(RHS.containsOneValue() && "Invalid join with reserved register");
// Optimization for reserved registers like ESP. We can only merge with a
- // reserved physreg if RHS has a single value that is a copy of CP.DstReg().
+ // reserved physreg if RHS has a single value that is a copy of DstReg.
// The live range of the reserved register will look like a set of dead defs
// - we don't properly track the live range of reserved registers.
// Deny any overlapping intervals. This depends on all the reserved
// register live ranges to look like dead defs.
- for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI)
+ for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
if (RHS.overlaps(LIS->getRegUnit(*UI))) {
DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
return false;
@@ -1229,7 +1438,46 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// defs are there.
// Delete the identity copy.
- MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+ MachineInstr *CopyMI;
+ if (CP.isFlipped()) {
+ CopyMI = MRI->getVRegDef(RHS.reg);
+ } else {
+ if (!MRI->hasOneNonDBGUse(RHS.reg)) {
+ DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
+ return false;
+ }
+
+ MachineInstr *DestMI = MRI->getVRegDef(RHS.reg);
+ CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg);
+ const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+ const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
+
+ // We checked above that there are no interfering defs of the physical
+ // register. However, for this case, where we intent to move up the def of
+ // the physical register, we also need to check for interfering uses.
+ SlotIndexes *Indexes = LIS->getSlotIndexes();
+ for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
+ SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
+ MachineInstr *MI = LIS->getInstructionFromIndex(SI);
+ if (MI->readsRegister(DstReg, TRI)) {
+ DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
+ return false;
+ }
+ }
+
+ // We're going to remove the copy which defines a physical reserved
+ // register, so remove its valno, etc.
+ DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at "
+ << CopyRegIdx << "\n");
+
+ LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
+ // Create a new dead def at the new def location.
+ for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+ LiveRange &LR = LIS->getRegUnit(*UI);
+ LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
+ }
+ }
+
LIS->RemoveMachineInstrFromMaps(CopyMI);
CopyMI->eraseFromParent();
@@ -1306,15 +1554,29 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
namespace {
/// Track information about values in a single virtual register about to be
/// joined. Objects of this class are always created in pairs - one for each
-/// side of the CoalescerPair.
+/// side of the CoalescerPair (or one for each lane of a side of the coalescer
+/// pair)
class JoinVals {
- LiveInterval &LI;
-
- // Location of this register in the final joined register.
- // Either CP.DstIdx or CP.SrcIdx.
- unsigned SubIdx;
-
- // Values that will be present in the final live range.
+ /// Live range we work on.
+ LiveRange &LR;
+ /// (Main) register we work on.
+ const unsigned Reg;
+
+ /// Reg (and therefore the values in this liverange) will end up as
+ /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
+ /// CP.SrcIdx.
+ const unsigned SubIdx;
+ /// The LaneMask that this liverange will occupy the coalesced register. May
+ /// be smaller than the lanemask produced by SubIdx when merging subranges.
+ const unsigned LaneMask;
+
+ /// This is true when joining sub register ranges, false when joining main
+ /// ranges.
+ const bool SubRangeJoin;
+ /// Whether the current LiveInterval tracks subregister liveness.
+ const bool TrackSubRegLiveness;
+
+ /// Values that will be present in the final live range.
SmallVectorImpl<VNInfo*> &NewVNInfo;
const CoalescerPair &CP;
@@ -1322,75 +1584,75 @@ class JoinVals {
SlotIndexes *Indexes;
const TargetRegisterInfo *TRI;
- // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
- // This is suitable for passing to LiveInterval::join().
+ /// Value number assignments. Maps value numbers in LI to entries in
+ /// NewVNInfo. This is suitable for passing to LiveInterval::join().
SmallVector<int, 8> Assignments;
- // Conflict resolution for overlapping values.
+ /// Conflict resolution for overlapping values.
enum ConflictResolution {
- // No overlap, simply keep this value.
+ /// No overlap, simply keep this value.
CR_Keep,
- // Merge this value into OtherVNI and erase the defining instruction.
- // Used for IMPLICIT_DEF, coalescable copies, and copies from external
- // values.
+ /// Merge this value into OtherVNI and erase the defining instruction.
+ /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
+ /// values.
CR_Erase,
- // Merge this value into OtherVNI but keep the defining instruction.
- // This is for the special case where OtherVNI is defined by the same
- // instruction.
+ /// Merge this value into OtherVNI but keep the defining instruction.
+ /// This is for the special case where OtherVNI is defined by the same
+ /// instruction.
CR_Merge,
- // Keep this value, and have it replace OtherVNI where possible. This
- // complicates value mapping since OtherVNI maps to two different values
- // before and after this def.
- // Used when clobbering undefined or dead lanes.
+ /// Keep this value, and have it replace OtherVNI where possible. This
+ /// complicates value mapping since OtherVNI maps to two different values
+ /// before and after this def.
+ /// Used when clobbering undefined or dead lanes.
CR_Replace,
- // Unresolved conflict. Visit later when all values have been mapped.
+ /// Unresolved conflict. Visit later when all values have been mapped.
CR_Unresolved,
- // Unresolvable conflict. Abort the join.
+ /// Unresolvable conflict. Abort the join.
CR_Impossible
};
- // Per-value info for LI. The lane bit masks are all relative to the final
- // joined register, so they can be compared directly between SrcReg and
- // DstReg.
+ /// Per-value info for LI. The lane bit masks are all relative to the final
+ /// joined register, so they can be compared directly between SrcReg and
+ /// DstReg.
struct Val {
ConflictResolution Resolution;
- // Lanes written by this def, 0 for unanalyzed values.
+ /// Lanes written by this def, 0 for unanalyzed values.
unsigned WriteLanes;
- // Lanes with defined values in this register. Other lanes are undef and
- // safe to clobber.
+ /// Lanes with defined values in this register. Other lanes are undef and
+ /// safe to clobber.
unsigned ValidLanes;
- // Value in LI being redefined by this def.
+ /// Value in LI being redefined by this def.
VNInfo *RedefVNI;
- // Value in the other live range that overlaps this def, if any.
+ /// Value in the other live range that overlaps this def, if any.
VNInfo *OtherVNI;
- // Is this value an IMPLICIT_DEF that can be erased?
- //
- // IMPLICIT_DEF values should only exist at the end of a basic block that
- // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
- // safely erased if they are overlapping a live value in the other live
- // interval.
- //
- // Weird control flow graphs and incomplete PHI handling in
- // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
- // longer live ranges. Such IMPLICIT_DEF values should be treated like
- // normal values.
+ /// Is this value an IMPLICIT_DEF that can be erased?
+ ///
+ /// IMPLICIT_DEF values should only exist at the end of a basic block that
+ /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+ /// safely erased if they are overlapping a live value in the other live
+ /// interval.
+ ///
+ /// Weird control flow graphs and incomplete PHI handling in
+ /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+ /// longer live ranges. Such IMPLICIT_DEF values should be treated like
+ /// normal values.
bool ErasableImplicitDef;
- // True when the live range of this value will be pruned because of an
- // overlapping CR_Replace value in the other live range.
+ /// True when the live range of this value will be pruned because of an
+ /// overlapping CR_Replace value in the other live range.
bool Pruned;
- // True once Pruned above has been computed.
+ /// True once Pruned above has been computed.
bool PrunedComputed;
Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
@@ -1400,30 +1662,75 @@ class JoinVals {
bool isAnalyzed() const { return WriteLanes != 0; }
};
- // One entry per value number in LI.
+ /// One entry per value number in LI.
SmallVector<Val, 8> Vals;
- unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef);
- VNInfo *stripCopies(VNInfo *VNI);
+ /// Compute the bitmask of lanes actually written by DefMI.
+ /// Set Redef if there are any partial register definitions that depend on the
+ /// previous value of the register.
+ unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
+
+ /// Find the ultimate value that VNI was copied from.
+ std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
+
+ bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const;
+
+ /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+ /// Return a conflict resolution when possible, but leave the hard cases as
+ /// CR_Unresolved.
+ /// Recursively calls computeAssignment() on this and Other, guaranteeing that
+ /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+ /// The recursion always goes upwards in the dominator tree, making loops
+ /// impossible.
ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+
+ /// Compute the value assignment for ValNo in RI.
+ /// This may be called recursively by analyzeValue(), but never for a ValNo on
+ /// the stack.
void computeAssignment(unsigned ValNo, JoinVals &Other);
+
+ /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
+ /// the extent of the tainted lanes in the block.
+ ///
+ /// Multiple values in Other.LR can be affected since partial redefinitions
+ /// can preserve previously tainted lanes.
+ ///
+ /// 1 %dst = VLOAD <-- Define all lanes in %dst
+ /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
+ /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
+ /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+ ///
+ /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+ /// entry to TaintedVals.
+ ///
+ /// Returns false if the tainted lanes extend beyond the basic block.
bool taintExtent(unsigned, unsigned, JoinVals&,
SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
- bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned);
+
+ /// Return true if MI uses any of the given Lanes from Reg.
+ /// This does not include partial redefinitions of Reg.
+ bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const;
+
+ /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
+ /// be pruned:
+ ///
+ /// %dst = COPY %src
+ /// %src = COPY %dst <-- This value to be pruned.
+ /// %dst = COPY %src <-- This value is a copy of a pruned value.
bool isPrunedValue(unsigned ValNo, JoinVals &Other);
public:
- JoinVals(LiveInterval &li, unsigned subIdx,
- SmallVectorImpl<VNInfo*> &newVNInfo,
- const CoalescerPair &cp,
- LiveIntervals *lis,
- const TargetRegisterInfo *tri)
- : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis),
- Indexes(LIS->getSlotIndexes()), TRI(tri),
- Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums())
+ JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask,
+ SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
+ LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
+ bool TrackSubRegLiveness)
+ : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
+ SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
+ NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
+ TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums())
{}
- /// Analyze defs in LI and compute a value mapping in NewVNInfo.
+ /// Analyze defs in LR and compute a value mapping in NewVNInfo.
/// Returns false if any conflicts were impossible to resolve.
bool mapValues(JoinVals &Other);
@@ -1431,10 +1738,16 @@ public:
/// Returns false if any conflicts were impossible to resolve.
bool resolveConflicts(JoinVals &Other);
- /// Prune the live range of values in Other.LI where they would conflict with
- /// CR_Replace values in LI. Collect end points for restoring the live range
+ /// Prune the live range of values in Other.LR where they would conflict with
+ /// CR_Replace values in LR. Collect end points for restoring the live range
/// after joining.
- void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
+ void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
+ bool changeInstrs);
+
+ /// Removes subranges starting at copies that get removed. This sometimes
+ /// happens when undefined subranges are copied around. These ranges contain
+ /// no usefull information and can be removed.
+ void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask);
/// Erase any machine instructions that have been coalesced away.
/// Add erased instructions to ErasedInstrs.
@@ -1448,13 +1761,11 @@ public:
};
} // end anonymous namespace
-/// Compute the bitmask of lanes actually written by DefMI.
-/// Set Redef if there are any partial register definitions that depend on the
-/// previous value of the register.
-unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) {
+unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
+ const {
unsigned L = 0;
for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) {
- if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef())
+ if (!MO->isReg() || MO->getReg() != Reg || !MO->isDef())
continue;
L |= TRI->getSubRegIndexLaneMask(
TRI->composeSubRegIndices(SubIdx, MO->getSubReg()));
@@ -1464,36 +1775,71 @@ unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) {
return L;
}
-/// Find the ultimate value that VNI was copied from.
-VNInfo *JoinVals::stripCopies(VNInfo *VNI) {
+std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
+ const VNInfo *VNI) const {
+ unsigned Reg = this->Reg;
+
while (!VNI->isPHIDef()) {
- MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def);
+ SlotIndex Def = VNI->def;
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
assert(MI && "No defining instruction");
if (!MI->isFullCopy())
+ return std::make_pair(VNI, Reg);
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return std::make_pair(VNI, Reg);
+
+ const LiveInterval &LI = LIS->getInterval(SrcReg);
+ const VNInfo *ValueIn;
+ // No subrange involved.
+ if (!SubRangeJoin || !LI.hasSubRanges()) {
+ LiveQueryResult LRQ = LI.Query(Def);
+ ValueIn = LRQ.valueIn();
+ } else {
+ // Query subranges. Pick the first matching one.
+ ValueIn = nullptr;
+ for (const LiveInterval::SubRange &S : LI.subranges()) {
+ // Transform lanemask to a mask in the joined live interval.
+ unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
+ if ((SMask & LaneMask) == 0)
+ continue;
+ LiveQueryResult LRQ = S.Query(Def);
+ ValueIn = LRQ.valueIn();
+ break;
+ }
+ }
+ if (ValueIn == nullptr)
break;
- unsigned Reg = MI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- break;
- LiveQueryResult LRQ = LIS->getInterval(Reg).Query(VNI->def);
- if (!LRQ.valueIn())
- break;
- VNI = LRQ.valueIn();
+ VNI = ValueIn;
+ Reg = SrcReg;
}
- return VNI;
+ return std::make_pair(VNI, Reg);
+}
+
+bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
+ const JoinVals &Other) const {
+ const VNInfo *Orig0;
+ unsigned Reg0;
+ std::tie(Orig0, Reg0) = followCopyChain(Value0);
+ if (Orig0 == Value1)
+ return true;
+
+ const VNInfo *Orig1;
+ unsigned Reg1;
+ std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
+
+ // The values are equal if they are defined at the same place and use the
+ // same register. Note that we cannot compare VNInfos directly as some of
+ // them might be from a copy created in mergeSubRangeInto() while the other
+ // is from the original LiveInterval.
+ return Orig0->def == Orig1->def && Reg0 == Reg1;
}
-/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
-/// Return a conflict resolution when possible, but leave the hard cases as
-/// CR_Unresolved.
-/// Recursively calls computeAssignment() on this and Other, guaranteeing that
-/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
-/// The recursion always goes upwards in the dominator tree, making loops
-/// impossible.
JoinVals::ConflictResolution
JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
Val &V = Vals[ValNo];
assert(!V.isAnalyzed() && "Value has already been analyzed!");
- VNInfo *VNI = LI.getValNumInfo(ValNo);
+ VNInfo *VNI = LR.getValNumInfo(ValNo);
if (VNI->isUnused()) {
V.WriteLanes = ~0u;
return CR_Keep;
@@ -1503,46 +1849,56 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
const MachineInstr *DefMI = nullptr;
if (VNI->isPHIDef()) {
// Conservatively assume that all lanes in a PHI are valid.
- V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
+ V.ValidLanes = V.WriteLanes = Lanes;
} else {
DefMI = Indexes->getInstructionFromIndex(VNI->def);
- bool Redef = false;
- V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
-
- // If this is a read-modify-write instruction, there may be more valid
- // lanes than the ones written by this instruction.
- // This only covers partial redef operands. DefMI may have normal use
- // operands reading the register. They don't contribute valid lanes.
- //
- // This adds ssub1 to the set of valid lanes in %src:
- //
- // %src:ssub1<def> = FOO
- //
- // This leaves only ssub1 valid, making any other lanes undef:
- //
- // %src:ssub1<def,read-undef> = FOO %src:ssub2
- //
- // The <read-undef> flag on the def operand means that old lane values are
- // not important.
- if (Redef) {
- V.RedefVNI = LI.Query(VNI->def).valueIn();
- assert(V.RedefVNI && "Instruction is reading nonexistent value");
- computeAssignment(V.RedefVNI->id, Other);
- V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
- }
+ assert(DefMI != nullptr);
+ if (SubRangeJoin) {
+ // We don't care about the lanes when joining subregister ranges.
+ V.ValidLanes = V.WriteLanes = 1;
+ } else {
+ bool Redef = false;
+ V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
+
+ // If this is a read-modify-write instruction, there may be more valid
+ // lanes than the ones written by this instruction.
+ // This only covers partial redef operands. DefMI may have normal use
+ // operands reading the register. They don't contribute valid lanes.
+ //
+ // This adds ssub1 to the set of valid lanes in %src:
+ //
+ // %src:ssub1<def> = FOO
+ //
+ // This leaves only ssub1 valid, making any other lanes undef:
+ //
+ // %src:ssub1<def,read-undef> = FOO %src:ssub2
+ //
+ // The <read-undef> flag on the def operand means that old lane values are
+ // not important.
+ if (Redef) {
+ V.RedefVNI = LR.Query(VNI->def).valueIn();
+ assert((TrackSubRegLiveness || V.RedefVNI) &&
+ "Instruction is reading nonexistent value");
+ if (V.RedefVNI != nullptr) {
+ computeAssignment(V.RedefVNI->id, Other);
+ V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
+ }
+ }
- // An IMPLICIT_DEF writes undef values.
- if (DefMI->isImplicitDef()) {
- // We normally expect IMPLICIT_DEF values to be live only until the end
- // of their block. If the value is really live longer and gets pruned in
- // another block, this flag is cleared again.
- V.ErasableImplicitDef = true;
- V.ValidLanes &= ~V.WriteLanes;
+ // An IMPLICIT_DEF writes undef values.
+ if (DefMI->isImplicitDef()) {
+ // We normally expect IMPLICIT_DEF values to be live only until the end
+ // of their block. If the value is really live longer and gets pruned in
+ // another block, this flag is cleared again.
+ V.ErasableImplicitDef = true;
+ V.ValidLanes &= ~V.WriteLanes;
+ }
}
}
// Find the value in Other that overlaps VNI->def, if any.
- LiveQueryResult OtherLRQ = Other.LI.Query(VNI->def);
+ LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
// It is possible that both values are defined by the same instruction, or
// the values are PHIs defined in the same block. When that happens, the two
@@ -1612,8 +1968,14 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
return CR_Replace;
// Check for simple erasable conflicts.
- if (DefMI->isImplicitDef())
+ if (DefMI->isImplicitDef()) {
+ // We need the def for the subregister if there is nothing else live at the
+ // subrange at this point.
+ if (TrackSubRegLiveness
+ && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0)
+ return CR_Replace;
return CR_Erase;
+ }
// Include the non-conflict where DefMI is a coalescable copy that kills
// OtherVNI. We still want the copy erased and value numbers merged.
@@ -1634,8 +1996,8 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// %other = COPY %ext
// %this = COPY %ext <-- Erase this copy
//
- if (DefMI->isFullCopy() && !CP.isPartial() &&
- stripCopies(VNI) == stripCopies(V.OtherVNI))
+ if (DefMI->isFullCopy() && !CP.isPartial()
+ && valuesIdentical(VNI, V.OtherVNI, Other))
return CR_Erase;
// If the lanes written by this instruction were all undef in OtherVNI, it is
@@ -1670,7 +2032,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// VNI is clobbering live lanes in OtherVNI, but there is still the
// possibility that no instructions actually read the clobbered lanes.
// If we're clobbering all the lanes in OtherVNI, at least one must be read.
- // Otherwise Other.LI wouldn't be live here.
+ // Otherwise Other.RI wouldn't be live here.
if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
return CR_Impossible;
@@ -1691,9 +2053,6 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
return CR_Unresolved;
}
-/// Compute the value assignment for ValNo in LI.
-/// This may be called recursively by analyzeValue(), but never for a ValNo on
-/// the stack.
void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
Val &V = Vals[ValNo];
if (V.isAnalyzed()) {
@@ -1709,73 +2068,64 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
- DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@'
- << LI.getValNumInfo(ValNo)->def << " into "
- << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@'
+ DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@'
+ << LR.getValNumInfo(ValNo)->def << " into "
+ << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
<< V.OtherVNI->def << " --> @"
<< NewVNInfo[Assignments[ValNo]]->def << '\n');
break;
case CR_Replace:
- case CR_Unresolved:
+ case CR_Unresolved: {
// The other value is going to be pruned if this join is successful.
assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
- Other.Vals[V.OtherVNI->id].Pruned = true;
+ Val &OtherV = Other.Vals[V.OtherVNI->id];
+ // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
+ // its lanes.
+ if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness)
+ OtherV.ErasableImplicitDef = false;
+ OtherV.Pruned = true;
+ }
// Fall through.
default:
// This value number needs to go in the final joined live range.
Assignments[ValNo] = NewVNInfo.size();
- NewVNInfo.push_back(LI.getValNumInfo(ValNo));
+ NewVNInfo.push_back(LR.getValNumInfo(ValNo));
break;
}
}
bool JoinVals::mapValues(JoinVals &Other) {
- for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
computeAssignment(i, Other);
if (Vals[i].Resolution == CR_Impossible) {
- DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i
- << '@' << LI.getValNumInfo(i)->def << '\n');
+ DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i
+ << '@' << LR.getValNumInfo(i)->def << '\n');
return false;
}
}
return true;
}
-/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute
-/// the extent of the tainted lanes in the block.
-///
-/// Multiple values in Other.LI can be affected since partial redefinitions can
-/// preserve previously tainted lanes.
-///
-/// 1 %dst = VLOAD <-- Define all lanes in %dst
-/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
-/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
-/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
-///
-/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
-/// entry to TaintedVals.
-///
-/// Returns false if the tainted lanes extend beyond the basic block.
bool JoinVals::
taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
- VNInfo *VNI = LI.getValNumInfo(ValNo);
+ VNInfo *VNI = LR.getValNumInfo(ValNo);
MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
- // Scan Other.LI from VNI.def to MBBEnd.
- LiveInterval::iterator OtherI = Other.LI.find(VNI->def);
- assert(OtherI != Other.LI.end() && "No conflict?");
+ // Scan Other.LR from VNI.def to MBBEnd.
+ LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
+ assert(OtherI != Other.LR.end() && "No conflict?");
do {
// OtherI is pointing to a tainted value. Abort the join if the tainted
// lanes escape the block.
SlotIndex End = OtherI->end;
if (End >= MBBEnd) {
- DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':'
+ DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':'
<< OtherI->valno->id << '@' << OtherI->start << '\n');
return false;
}
- DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':'
+ DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':'
<< OtherI->valno->id << '@' << OtherI->start
<< " to " << End << '\n');
// A dead def is not a problem.
@@ -1784,7 +2134,7 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
TaintExtent.push_back(std::make_pair(End, TaintedLanes));
// Check for another def in the MBB.
- if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd)
+ if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
break;
// Lanes written by the new def are no longer tainted.
@@ -1796,10 +2146,8 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
return true;
}
-/// Return true if MI uses any of the given Lanes from Reg.
-/// This does not include partial redefinitions of Reg.
-bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
- unsigned Lanes) {
+bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx,
+ unsigned Lanes) const {
if (MI->isDebugValue())
return false;
for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
@@ -1815,16 +2163,19 @@ bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
}
bool JoinVals::resolveConflicts(JoinVals &Other) {
- for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
Val &V = Vals[i];
assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
if (V.Resolution != CR_Unresolved)
continue;
- DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i
- << '@' << LI.getValNumInfo(i)->def << '\n');
+ DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i
+ << '@' << LR.getValNumInfo(i)->def << '\n');
+ if (SubRangeJoin)
+ return false;
+
++NumLaneConflicts;
assert(V.OtherVNI && "Inconsistent conflict resolution.");
- VNInfo *VNI = LI.getValNumInfo(i);
+ VNInfo *VNI = LR.getValNumInfo(i);
const Val &OtherV = Other.Vals[V.OtherVNI->id];
// VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
@@ -1854,7 +2205,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
unsigned TaintNum = 0;
for(;;) {
assert(MI != MBB->end() && "Bad LastMI");
- if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) {
+ if (usesLanes(MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
return false;
}
@@ -1876,13 +2227,6 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
return true;
}
-// Determine if ValNo is a copy of a value number in LI or Other.LI that will
-// be pruned:
-//
-// %dst = COPY %src
-// %src = COPY %dst <-- This value to be pruned.
-// %dst = COPY %src <-- This value is a copy of a pruned value.
-//
bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
Val &V = Vals[ValNo];
if (V.Pruned || V.PrunedComputed)
@@ -1899,15 +2243,16 @@ bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
}
void JoinVals::pruneValues(JoinVals &Other,
- SmallVectorImpl<SlotIndex> &EndPoints) {
- for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
- SlotIndex Def = LI.getValNumInfo(i)->def;
+ SmallVectorImpl<SlotIndex> &EndPoints,
+ bool changeInstrs) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ SlotIndex Def = LR.getValNumInfo(i)->def;
switch (Vals[i].Resolution) {
case CR_Keep:
break;
case CR_Replace: {
- // This value takes precedence over the value in Other.LI.
- LIS->pruneValue(&Other.LI, Def, &EndPoints);
+ // This value takes precedence over the value in Other.LR.
+ LIS->pruneValue(Other.LR, Def, &EndPoints);
// Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
// instructions are only inserted to provide a live-out value for PHI
// predecessors, so the instruction should simply go away once its value
@@ -1916,34 +2261,37 @@ void JoinVals::pruneValues(JoinVals &Other,
bool EraseImpDef = OtherV.ErasableImplicitDef &&
OtherV.Resolution == CR_Keep;
if (!Def.isBlock()) {
- // Remove <def,read-undef> flags. This def is now a partial redef.
- // Also remove <def,dead> flags since the joined live range will
- // continue past this instruction.
- for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
- MO.isValid(); ++MO)
- if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
- MO->setIsUndef(EraseImpDef);
- MO->setIsDead(false);
+ if (changeInstrs) {
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ // Also remove <def,dead> flags since the joined live range will
+ // continue past this instruction.
+ for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
+ MO.isValid(); ++MO) {
+ if (MO->isReg() && MO->isDef() && MO->getReg() == Reg) {
+ MO->setIsUndef(EraseImpDef);
+ MO->setIsDead(false);
+ }
}
+ }
// This value will reach instructions below, but we need to make sure
// the live range also reaches the instruction at Def.
if (!EraseImpDef)
EndPoints.push_back(Def);
}
- DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
- << ": " << Other.LI << '\n');
+ DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def
+ << ": " << Other.LR << '\n');
break;
}
case CR_Erase:
case CR_Merge:
if (isPrunedValue(i, Other)) {
- // This value is ultimately a copy of a pruned value in LI or Other.LI.
+ // This value is ultimately a copy of a pruned value in LR or Other.LR.
// We can no longer trust the value mapping computed by
// computeAssignment(), the value that was originally copied could have
// been replaced.
- LIS->pruneValue(&LI, Def, &EndPoints);
- DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at "
- << Def << ": " << LI << '\n');
+ LIS->pruneValue(LR, Def, &EndPoints);
+ DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at "
+ << Def << ": " << LR << '\n');
}
break;
case CR_Unresolved:
@@ -1953,25 +2301,65 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}
+void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
+{
+ // Look for values being erased.
+ bool DidPrune = false;
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ if (Vals[i].Resolution != CR_Erase)
+ continue;
+
+ // Check subranges at the point where the copy will be removed.
+ SlotIndex Def = LR.getValNumInfo(i)->def;
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ LiveQueryResult Q = S.Query(Def);
+
+ // If a subrange starts at the copy then an undefined value has been
+ // copied and we must remove that subrange value as well.
+ VNInfo *ValueOut = Q.valueOutOrDead();
+ if (ValueOut != nullptr && Q.valueIn() == nullptr) {
+ DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask)
+ << " at " << Def << "\n");
+ LIS->pruneValue(S, Def, nullptr);
+ DidPrune = true;
+ // Mark value number as unused.
+ ValueOut->markUnused();
+ continue;
+ }
+ // If a subrange ends at the copy, then a value was copied but only
+ // partially used later. Shrink the subregister range apropriately.
+ if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
+ DEBUG(dbgs() << "\t\tDead uses at sublane "
+ << format("%04X", S.LaneMask) << " at " << Def << "\n");
+ ShrinkMask |= S.LaneMask;
+ }
+ }
+ }
+ if (DidPrune)
+ LI.removeEmptySubRanges();
+}
+
void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs) {
- for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
// Get the def location before markUnused() below invalidates it.
- SlotIndex Def = LI.getValNumInfo(i)->def;
+ SlotIndex Def = LR.getValNumInfo(i)->def;
switch (Vals[i].Resolution) {
- case CR_Keep:
+ case CR_Keep: {
// If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
// longer. The IMPLICIT_DEF instructions are only inserted by
// PHIElimination to guarantee that all PHI predecessors have a value.
if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
break;
- // Remove value number i from LI. Note that this VNInfo is still present
- // in NewVNInfo, so it will appear as an unused value number in the final
- // joined interval.
- LI.getValNumInfo(i)->markUnused();
- LI.removeValNo(LI.getValNumInfo(i));
- DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+ // Remove value number i from LR.
+ VNInfo *VNI = LR.getValNumInfo(i);
+ LR.removeValNo(VNI);
+ // Note that this VNInfo is reused and still referenced in NewVNInfo,
+ // make it appear like an unused value number.
+ VNI->markUnused();
+ DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n');
// FALL THROUGH.
+ }
case CR_Erase: {
MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
@@ -1994,12 +2382,96 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
}
}
+void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ unsigned LaneMask,
+ const CoalescerPair &CP) {
+ SmallVector<VNInfo*, 16> NewVNInfo;
+ JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
+ NewVNInfo, CP, LIS, TRI, true, true);
+ JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
+ NewVNInfo, CP, LIS, TRI, true, true);
+
+ // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
+ // Conflicts should already be resolved so the mapping/resolution should
+ // always succeed.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
+ llvm_unreachable("Can't join subrange although main ranges are compatible");
+ if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
+ llvm_unreachable("Can't join subrange although main ranges are compatible");
+
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints, false);
+ RHSVals.pruneValues(LHSVals, EndPoints, false);
+
+ LRange.verify();
+ RRange.verify();
+
+ // Join RRange into LHS.
+ LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
+ NewVNInfo);
+
+ DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
+ if (EndPoints.empty())
+ return;
+
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+ << " points: " << LRange << '\n');
+ LIS->extendToIndices(LRange, EndPoints);
+}
+
+void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+ const LiveRange &ToMerge,
+ unsigned LaneMask, CoalescerPair &CP) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ for (LiveInterval::SubRange &R : LI.subranges()) {
+ unsigned RMask = R.LaneMask;
+ // LaneMask of subregisters common to subrange R and ToMerge.
+ unsigned Common = RMask & LaneMask;
+ // There is nothing to do without common subregs.
+ if (Common == 0)
+ continue;
+
+ DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common));
+ // LaneMask of subregisters contained in the R range but not in ToMerge,
+ // they have to split into their own subrange.
+ unsigned LRest = RMask & ~LaneMask;
+ LiveInterval::SubRange *CommonRange;
+ if (LRest != 0) {
+ R.LaneMask = LRest;
+ DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest));
+ // Duplicate SubRange for newly merged common stuff.
+ CommonRange = LI.createSubRangeFrom(Allocator, Common, R);
+ } else {
+ // Reuse the existing range.
+ R.LaneMask = Common;
+ CommonRange = &R;
+ }
+ LiveRange RangeCopy(ToMerge, Allocator);
+ joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
+ LaneMask &= ~RMask;
+ }
+
+ if (LaneMask != 0) {
+ DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask));
+ LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
+ }
+}
+
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
- JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI);
- JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI);
+ bool TrackSubRegLiveness = MRI->tracksSubRegLiveness();
+ JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
+ TRI, false, TrackSubRegLiveness);
+ JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
+ TRI, false, TrackSubRegLiveness);
DEBUG(dbgs() << "\t\tRHS = " << RHS
<< "\n\t\tLHS = " << LHS
@@ -2015,14 +2487,55 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
return false;
// All clear, the live ranges can be merged.
+ if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+
+ // Transform lanemasks from the LHS to masks in the coalesced register and
+ // create initial subranges if necessary.
+ unsigned DstIdx = CP.getDstIdx();
+ if (!LHS.hasSubRanges()) {
+ unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(DstIdx);
+ // LHS must support subregs or we wouldn't be in this codepath.
+ assert(Mask != 0);
+ LHS.createSubRangeFrom(Allocator, Mask, LHS);
+ } else if (DstIdx != 0) {
+ // Transform LHS lanemasks to new register class if necessary.
+ for (LiveInterval::SubRange &R : LHS.subranges()) {
+ unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
+ R.LaneMask = Mask;
+ }
+ }
+ DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg())
+ << ' ' << LHS << '\n');
+
+ // Determine lanemasks of RHS in the coalesced register and merge subranges.
+ unsigned SrcIdx = CP.getSrcIdx();
+ if (!RHS.hasSubRanges()) {
+ unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(SrcIdx);
+ mergeSubRangeInto(LHS, RHS, Mask, CP);
+ } else {
+ // Pair up subranges and merge.
+ for (LiveInterval::SubRange &R : RHS.subranges()) {
+ unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
+ mergeSubRangeInto(LHS, R, Mask, CP);
+ }
+ }
+
+ DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
+
+ LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ }
// The merging algorithm in LiveInterval::join() can't handle conflicting
// value mappings, so we need to remove any live ranges that overlap a
// CR_Replace resolution. Collect a set of end points that can be used to
// restore the live range after joining.
SmallVector<SlotIndex, 8> EndPoints;
- LHSVals.pruneValues(RHSVals, EndPoints);
- RHSVals.pruneValues(LHSVals, EndPoints);
+ LHSVals.pruneValues(RHSVals, EndPoints, true);
+ RHSVals.pruneValues(LHSVals, EndPoints, true);
// Erase COPY and IMPLICIT_DEF instructions. This may cause some external
// registers to require trimming.
@@ -2041,24 +2554,23 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
MRI->clearKillFlags(LHS.reg);
MRI->clearKillFlags(RHS.reg);
- if (EndPoints.empty())
- return true;
+ if (!EndPoints.empty()) {
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+ << " points: " << LHS << '\n');
+ LIS->extendToIndices((LiveRange&)LHS, EndPoints);
+ }
- // Recompute the parts of the live range we had to remove because of
- // CR_Replace conflicts.
- DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
- << " points: " << LHS << '\n');
- LIS->extendToIndices(LHS, EndPoints);
return true;
}
-/// Attempt to join these two intervals. On failure, this returns false.
bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
}
namespace {
-// Information concerning MBB coalescing priority.
+/// Information concerning MBB coalescing priority.
struct MBBPriorityInfo {
MachineBasicBlock *MBB;
unsigned Depth;
@@ -2069,10 +2581,10 @@ struct MBBPriorityInfo {
};
}
-// C-style comparator that sorts first based on the loop depth of the basic
-// block (the unsigned), and then on the MBB number.
-//
-// EnableGlobalCopies assumes that the primary sort key is loop depth.
+/// C-style comparator that sorts first based on the loop depth of the basic
+/// block (the unsigned), and then on the MBB number.
+///
+/// EnableGlobalCopies assumes that the primary sort key is loop depth.
static int compareMBBPriority(const MBBPriorityInfo *LHS,
const MBBPriorityInfo *RHS) {
// Deeper loops first
@@ -2112,8 +2624,6 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
|| LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
}
-// Try joining WorkList copies starting from index From.
-// Null out any successful joins.
bool RegisterCoalescer::
copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
bool Progress = false;
@@ -2224,15 +2734,14 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &fn.getRegInfo();
TM = &fn.getTarget();
- TRI = TM->getSubtargetImpl()->getRegisterInfo();
- TII = TM->getSubtargetImpl()->getInstrInfo();
+ const TargetSubtargetInfo &STI = fn.getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TII = STI.getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
-
- const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
if (EnableGlobalCopies == cl::BOU_UNSET)
- JoinGlobalCopies = ST.useMachineScheduler();
+ JoinGlobalCopies = STI.useMachineScheduler();
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
@@ -2264,9 +2773,24 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
unsigned Reg = InflateRegs[i];
if (MRI->reg_nodbg_empty(Reg))
continue;
- if (MRI->recomputeRegClass(Reg, *TM)) {
+ if (MRI->recomputeRegClass(Reg)) {
DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
<< TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
+ LiveInterval &LI = LIS->getInterval(Reg);
+ unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ if (MaxMask == 0) {
+ // If the inflated register class does not support subregisters anymore
+ // remove the subranges.
+ LI.clearSubRanges();
+ } else {
+#ifndef NDEBUG
+ // If subranges are still supported, then the same subregs should still
+ // be supported.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ assert ((S.LaneMask & ~MaxMask) == 0);
+ }
+#endif
+ }
++NumInflated;
}
}
@@ -2277,7 +2801,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
return true;
}
-/// Implement the dump method.
void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
LIS->print(O, m);
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 6f8b337..76a7fef 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -36,8 +36,8 @@ static cl::opt<bool> StressSchedOpt(
void SchedulingPriorityQueue::anchor() { }
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
- : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()),
- TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf),
+ : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()),
+ TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
#ifndef NDEBUG
StressSched = StressSchedOpt;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index dece643..78bfd23 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -44,25 +44,24 @@ using namespace llvm;
static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable use of AA during MI GAD construction"));
+ cl::desc("Enable use of AA during MI DAG construction"));
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
- cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction"));
+ cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
- bool IsPostRAFlag,
- bool RemoveKillFlags,
+ bool IsPostRAFlag, bool RemoveKillFlags,
LiveIntervals *lis)
- : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
- IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
- CanHandleTerminators(false), FirstDbgValue(nullptr) {
+ : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
+ IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
+ CanHandleTerminators(false), FirstDbgValue(nullptr) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
"Virtual registers must be removed prior to PostRA scheduling");
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo &ST = mf.getSubtarget();
SchedModel.init(ST.getSchedModel(), &ST, TII);
}
@@ -253,7 +252,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
assert(MO.isDef() && "expect physreg def");
// Ask the target if address-backscheduling is desirable, and if so how much.
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
@@ -444,7 +443,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
int DefOp = Def->findRegisterDefOperandIdx(Reg);
dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
SU->addPred(dep);
}
@@ -614,10 +613,10 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
}
// Track current depth.
(*Depth)++;
- // Iterate over chain dependencies only.
+ // Iterate over memory dependencies only.
for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
I != E; ++I)
- if (I->isCtrl())
+ if (I->isNormalMemoryOrBarrier())
iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
return *Depth;
}
@@ -644,11 +643,12 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
(*I)->addPred(Dep);
}
- // Now go through all the chain successors and iterate from them.
- // Keep track of visited nodes.
+
+ // Iterate recursively over all previously added memory chain
+ // successors. Keep track of visited nodes.
for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
JE = (*I)->Succs.end(); J != JE; ++J)
- if (J->isCtrl())
+ if (J->isNormalMemoryOrBarrier())
iterateChainSucc (AA, MFI, SU, J->getSUnit(),
ExitSU, &Depth, Visited);
}
@@ -742,7 +742,7 @@ void ScheduleDAGInstrs::initSUnits() {
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs) {
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
@@ -891,7 +891,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// fall-through
new_alias_chain:
- // Chain all possibly aliasing memory references though SU.
+ // Chain all possibly aliasing memory references through SU.
if (AliasChain) {
unsigned ChainLatency = 0;
if (AliasChain->getInstr()->mayLoad())
@@ -991,11 +991,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Add dependence on alias chain, if needed.
if (AliasChain)
addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
- // But we also should check dependent instructions for the
- // SU in question.
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
- TrueMemOrderLatency);
}
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
} else if (MI->mayLoad()) {
bool MayAlias = true;
if (MI->isInvariantLoad(AA)) {
diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk
index 0e52ee3..9501ad9 100644
--- a/lib/CodeGen/SelectionDAG/Android.mk
+++ b/lib/CodeGen/SelectionDAG/Android.mk
@@ -22,6 +22,7 @@ codegen_selectiondag_SRC_FILES := \
SelectionDAGDumper.cpp \
SelectionDAGISel.cpp \
SelectionDAGPrinter.cpp \
+ StatepointLowering.cpp \
TargetLowering.cpp \
TargetSelectionDAGInfo.cpp
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 75e8167..fbedf2c 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_library(LLVMSelectionDAG
SelectionDAGDumper.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
+ StatepointLowering.cpp
ScheduleDAGVLIW.cpp
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a1291ed..6129401 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17,9 +17,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -303,6 +303,8 @@ namespace {
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
+ SDValue visitMLOAD(SDNode *N);
+ SDValue visitMSTORE(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -325,6 +327,7 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue CombineExtLoad(SDNode *N);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
@@ -361,6 +364,28 @@ namespace {
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
+ /// Holds a pointer to an LSBaseSDNode as well as information on where it
+ /// is located in a sequence of memory operations connected by a chain.
+ struct MemOpLink {
+ MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+ MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+ // What is the sequence number of this mem node.
+ // Lowest mem operand in the DAG starts at zero.
+ unsigned SequenceNum;
+ };
+
+ /// This is a helper function for MergeConsecutiveStores. When the source
+ /// elements of the consecutive stores are all constants or all extracted
+ /// vector elements, try to merge them into one larger store.
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumElem,
+ bool IsConstantSrc, bool UseVector);
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
@@ -378,12 +403,9 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ auto *F = DAG.getMachineFunction().getFunction();
+ ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
+ F->hasFnAttribute(Attribute::MinSize);
}
/// Runs the dag combiner on all nodes in the work list
@@ -444,7 +466,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
}
SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
@@ -736,10 +758,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ return SDValue();
}
if (N0.hasOneUse()) {
// reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
@@ -757,10 +778,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ return SDValue();
}
if (N1.hasOneUse()) {
// reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
@@ -785,11 +805,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
N->dump(&DAG);
dbgs() << "\nWith: ";
To[0].getNode()->dump(&DAG);
- dbgs() << " and " << NumTo-1 << " other values\n";
- for (unsigned i = 0, e = NumTo; i != e; ++i)
- assert((!To[i].getNode() ||
- N->getValueType(i) == To[i].getValueType()) &&
- "Cannot combine value to value of different type!"));
+ dbgs() << " and " << NumTo-1 << " other values\n");
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!");
+
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
@@ -874,8 +895,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
return DAG.getExtLoad(ExtType, dl, PVT,
@@ -1096,8 +1117,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
@@ -1160,10 +1181,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalTypes = Level >= AfterLegalizeTypes;
// Early exit if this basic block is in an optnone function.
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeNone))
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::OptimizeNone))
return;
// Add all the dag nodes to the worklist.
@@ -1351,6 +1370,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
+ case ISD::MLOAD: return visitMLOAD(N);
+ case ISD::MSTORE: return visitMSTORE(N);
}
return SDValue();
}
@@ -1475,7 +1496,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
- // rededundant.
+ // redundant.
Changed = true;
break;
@@ -1504,7 +1525,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
SDValue Result;
- // If we've change things around then replace token factor.
+ // If we've changed things around then replace token factor.
if (Changed) {
if (Ops.empty()) {
// The entry token is the only possible outcome.
@@ -1514,8 +1535,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
}
- // Don't add users to work list.
- return CombineTo(N, Result, false);
+ // Add users to worklist if AA is enabled, since it may introduce
+ // a lot of new chained token factors while removing memory deps.
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ return CombineTo(N, Result, UseAA /*add to worklist*/);
}
return Result;
@@ -1541,8 +1565,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
@@ -1563,6 +1585,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
// canonicalize constant to RHS
@@ -1714,8 +1738,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
@@ -1725,6 +1747,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
SDLoc(N), MVT::Glue));
// canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
@@ -1756,10 +1780,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
// canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
N1, N0, CarryIn);
@@ -1786,10 +1810,6 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
- dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
// fold vector ops
@@ -1807,6 +1827,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// fold (sub c1, c2) -> c1-c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
@@ -1826,6 +1848,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
VT);
@@ -1890,8 +1914,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an SUB.
@@ -1907,6 +1929,8 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
MVT::Glue));
// fold (subc x, 0) -> x + no borrow
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
@@ -2055,8 +2079,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
@@ -2066,6 +2088,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
}
// fold (sdiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
@@ -2145,8 +2169,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
@@ -2156,6 +2178,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
// fold (udiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
// fold (udiv x, (1 << c)) -> x >>u c
@@ -2197,11 +2221,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
// If we know the sign bits of both operands are zero, strength reduce to a
@@ -2239,11 +2263,11 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
// fold (urem x, pow2) -> (and x, pow2-1)
@@ -2522,6 +2546,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
//
// do not sink logical op inside of a vector extend, since it may combine
@@ -2529,6 +2554,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
EVT Op0VT = N0.getOperand(0).getValueType();
if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::BSWAP ||
// Avoid infinite looping with PromoteIntBinOp.
(N0.getOpcode() == ISD::ANY_EXTEND &&
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
@@ -2662,11 +2688,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
- unsigned BitWidth = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
@@ -2698,6 +2720,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// fold (and c1, c2) -> c1&c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
// canonicalize constant to RHS
@@ -2707,6 +2731,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && N1C->isAllOnesValue())
return N0;
// if (and x, c) is known to be zero, return 0
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, VT);
@@ -2793,6 +2818,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// actually legal and isn't going to get expanded, else this is a false
// optimisation.
bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getValueType(0),
Load->getMemoryVT());
// Resize the constant to the same size as the original memory access before
@@ -2838,6 +2864,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
@@ -2919,7 +2946,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
@@ -2939,7 +2966,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
@@ -2965,10 +2992,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT LoadedVT = LN0->getMemoryVT();
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
if (ExtVT == LoadedVT &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
+ ExtVT))) {
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
@@ -2983,7 +3011,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
+ ExtVT))) {
EVT PtrType = LN0->getOperand(1).getValueType();
unsigned Alignment = LN0->getAlignment();
@@ -3003,7 +3032,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
AddToWorklist(NewPtr.getNode());
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), NewPtr,
@@ -3313,9 +3341,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
// fold vector ops
@@ -3407,6 +3432,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
}
// fold (or c1, c2) -> c1|c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
// canonicalize constant to RHS
@@ -3440,15 +3467,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
- SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1);
- if (!COR.getNode())
- return SDValue();
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- DAG.getNode(ISD::OR, SDLoc(N0), VT,
- N0.getOperand(0), N1), COR);
+ if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1))
+ return DAG.getNode(
+ ISD::AND, SDLoc(N), VT,
+ DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
+ return SDValue();
}
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
@@ -3521,6 +3548,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
+ // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(1), N1.getOperand(1));
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X);
+ }
+
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
@@ -3790,9 +3828,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LHS, RHS, CC;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
@@ -3816,6 +3851,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (xor c1, c2) -> c1^c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
// canonicalize constant to RHS
@@ -3830,6 +3867,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return RXOR;
// fold !(x cc y) -> (x !cc y)
+ SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
@@ -4039,12 +4077,11 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
@@ -4061,8 +4098,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
- SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
- if (C.getNode())
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
} else {
@@ -4072,6 +4108,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
// fold (shl c1, c2) -> c1<<c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
// fold (shl 0, x) -> 0
@@ -4220,12 +4257,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
@@ -4234,6 +4270,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
// fold (sra c1, c2) -> (sra c1, c2)
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
// fold (sra 0, x) -> 0
@@ -4366,12 +4403,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
@@ -4380,6 +4416,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl c1, c2) -> c1 >>u c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
// fold (srl 0, x) -> 0
@@ -4608,13 +4645,47 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
+
+/// \brief Generate Min/Max node
+static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
+ SDValue True, SDValue False,
+ ISD::CondCode CC, const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ switch (CC) {
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
+ if (TLI.isOperationLegal(Opcode, VT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: {
+ unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
+ if (TLI.isOperationLegal(Opcode, VT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ default:
+ return SDValue();
+ }
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
@@ -4622,12 +4693,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (N1 == N2)
return N1;
// fold (select true, X, Y) -> X
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && !N0C->isNullValue())
return N1;
// fold (select false, X, Y) -> Y
if (N0C && N0C->isNullValue())
return N2;
// fold (select C, 1, X) -> (or C, X)
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
@@ -4639,6 +4712,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
if (VT.isInteger() &&
(VT0 == MVT::i1 || (VT0.isInteger() &&
TLI.getBooleanContents(false, false) ==
@@ -4687,6 +4761,28 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
+ // select x, y (fcmp lt x, y) -> fminnum x, y
+ // select x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+
+ // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
+ // no signed zeros as well as no nans.
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.UnsafeFPMath &&
+ VT.isFloatingPoint() && N0.hasOneUse() &&
+ DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+
+ SDValue FMinMax =
+ combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
+ N1, N2, CC, TLI, DAG);
+ if (FMinMax)
+ return FMinMax;
+ }
+
if ((!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
TLI.isOperationLegal(ISD::SELECT_CC, VT))
@@ -4771,6 +4867,166 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
+SDValue DAGCombiner::visitMSTORE(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ SDValue Data = MST->getValue();
+ SDLoc DL(N);
+
+ // If the MSTORE data type requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (Mask.getOpcode() == ISD::SETCC) {
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
+
+ SDValue Chain = MST->getChain();
+ SDValue Ptr = MST->getBasePtr();
+
+ EVT MemoryVT = MST->getMemoryVT();
+ unsigned Alignment = MST->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MST->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, MST->getAAInfo(), MST->getRanges());
+
+ Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ MST->isTruncatingStore());
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MST->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MST->getAAInfo(),
+ MST->getRanges());
+
+ Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ MST->isTruncatingStore());
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMLOAD(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+ SDValue Mask = MLD->getMask();
+ SDLoc DL(N);
+
+ // If the MLOAD result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+ if (Mask.getOpcode() == ISD::SETCC) {
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ SDValue Src0 = MLD->getSrc0();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Chain = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ EVT MemoryVT = MLD->getMemoryVT();
+ unsigned Alignment = MLD->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ ISD::NON_EXTLOAD);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ ISD::NON_EXTLOAD);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
+
+ SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+ SDValue RetOps[] = { LoadRes, Chain };
+ return DAG.getMergeValues(RetOps, DL);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4880,13 +5136,16 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
- }
-
- // Fold to a simpler select_cc
- if (SCC.getOpcode() == ISD::SETCC)
+ } else if (SCC->getOpcode() == ISD::UNDEF) {
+ // When the condition is UNDEF, just return the first operand. This is
+ // coherent the DAG creation, no setcc node is created in this case
+ return N2;
+ } else if (SCC.getOpcode() == ISD::SETCC) {
+ // Fold to a simpler select_cc
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
SCC.getOperand(0), SCC.getOperand(1), N2, N3,
SCC.getOperand(2));
+ }
}
// If we can fold this based on the true/false value, do so.
@@ -5047,6 +5306,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
}
}
+// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
+SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) &&
+ "Unexpected node type (not an extend)!");
+
+ // fold (sext (load x)) to multiple smaller sextloads; same for zext.
+ // For example, on a target with legal v4i32, but illegal v8i32, turn:
+ // (v8i32 (sext (v8i16 (load x))))
+ // into:
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))
+ // Where uses of the original load, i.e.:
+ // (v8i16 (load x))
+ // are replaced with:
+ // (v8i16 (truncate
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))))
+ //
+ // This combine is only applicable to illegal, but splittable, vectors.
+ // All legal types, and illegal non-vector types, are handled elsewhere.
+ // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
+ //
+ if (N0->getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
+ !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
+ !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
+ return SDValue();
+
+ ISD::LoadExtType ExtType =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+
+ // Try to split the vector types to get down to legal types.
+ EVT SplitSrcVT = SrcVT;
+ EVT SplitDstVT = DstVT;
+ while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+ SplitSrcVT.getVectorNumElements() > 1) {
+ SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
+ SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
+ }
+
+ if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ const unsigned NumSplits =
+ DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
+ const unsigned Stride = SplitSrcVT.getStoreSize();
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> Chains;
+
+ SDValue BasePtr = LN0->getBasePtr();
+ for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
+ const unsigned Offset = Idx * Stride;
+ const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+
+ SDValue SplitLoad = DAG.getExtLoad(
+ ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
+ LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
+ LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
+ Align, LN0->getAAInfo());
+
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, BasePtr.getValueType()));
+
+ Loads.push_back(SplitLoad.getValue(0));
+ Chains.push_back(SplitLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+
+ CombineTo(N, NewValue);
+
+ // Replace uses of the original load (before extension)
+ // with a truncate of the concatenated sextloaded vectors.
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+ CombineTo(N0.getNode(), Trunc, NewChain);
+ ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
+ (ISD::NodeType)N->getOpcode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -5113,17 +5468,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
// fold (sext (load x)) -> (sext (truncate (sextload x)))
- // None of the supported targets knows how to perform load and sign extend
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+ // Only generate vector extloads when 1) they're legal, and 2) they are
+ // deemed desirable by the target.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ((!LegalOperations && !VT.isVector() &&
+ !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -5140,6 +5496,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
+ // fold (sext (load x)) to multiple smaller sextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
// fold (sext (sextload x)) -> (sext (truncate (sextload x)))
// fold (sext ( extload x)) -> (sext (truncate (sextload x)))
if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
@@ -5147,7 +5508,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), MemVT,
@@ -5167,7 +5528,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
@@ -5403,17 +5764,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// fold (zext (load x)) -> (zext (truncate (zextload x)))
- // None of the supported targets knows how to perform load and vector_zext
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ // Only generate vector extloads when 1) they're legal, and 2) they are
+ // deemed desirable by the target.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ((!LegalOperations && !VT.isVector() &&
+ !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
@@ -5431,13 +5793,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
}
+ // fold (zext (load x)) to multiple smaller zextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
@@ -5474,7 +5841,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), MemVT,
@@ -5636,7 +6003,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
@@ -5666,7 +6033,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
- if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) {
+ if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
VT, LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
@@ -5795,7 +6162,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() - N01->getZExtValue());
}
- if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
return SDValue();
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -5874,6 +6241,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
return SDValue();
+ if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
+ return SDValue();
+
EVT PtrType = N0.getOperand(1).getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
@@ -5999,7 +6369,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
ISD::isUNINDEXEDLoad(N0.getNode()) &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -6015,7 +6385,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
N0.hasOneUse() &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -6318,19 +6688,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
- if (Res.getNode() != N) {
- if (!LegalOperations ||
- TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
- return Res;
-
- // Folding it resulted in an illegal node, and it's too late to
- // do that. Clean up the old node and forego the transformation.
- // Ideally this won't happen very often, because instcombine
- // and the earlier dagcombine runs (where illegal nodes are
- // permitted) should have folded most of them already.
- deleteAndRecombine(Res.getNode());
- }
+ // If we can't allow illegal operations, we need to check that this is just
+ // a fp -> int or int -> conversion and that the resulting operation will
+ // be legal.
+ if (!LegalOperations ||
+ (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::Constant, VT)))
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
}
// (conv (conv x, t1), t2) -> (conv x, t2)
@@ -6489,7 +6855,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (SrcEltVT.isFloatingPoint()) {
// Convert the input float vector to a int vector where the elements are the
// same sizes.
- assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
@@ -6498,7 +6863,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// Now we know the input is an integer vector. If the output is a FP type,
// convert to integer first, then to FP of the right size.
if (DstEltVT.isFloatingPoint()) {
- assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
@@ -6549,8 +6913,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
- for (unsigned j = 0; j != NumOutputsPerInput; ++j)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
+ Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
continue;
}
@@ -6575,6 +6938,133 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
+// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
+static SDValue performFaddFmulCombines(unsigned FusedOpcode,
+ bool Aggressive,
+ SDNode *N,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (N0.getOpcode() == ISD::FMA &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1));
+ }
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (N1->getOpcode() == ISD::FMA &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue performFsubFmulCombines(unsigned FusedOpcode,
+ bool Aggressive,
+ SDNode *N,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ SDLoc SL(N);
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse()))
+ return DAG.getNode(FusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+
+ // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(FusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (N0.getOpcode() == FusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1)));
+ }
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (N1.getOpcode() == FusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N20),
+ N21, N0));
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -6714,23 +7204,55 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
} // enable-unsafe-fp-math
+ if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Assume if there is an fmad instruction that it should be aggressively
+ // used.
+ if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
+ return Fused;
+ }
+
// FADD -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
- // fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1), N1);
+ if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Don't form FMA if we are preferring FMAD.
+ if (SDValue Fused
+ = performFaddFmulCombines(ISD::FMA,
+ TLI.enableAggressiveFMAFusion(VT),
+ N, TLI, DAG)) {
+ return Fused;
+ }
+ }
- // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1), N0);
+ // When FP_EXTEND nodes are free on the target, and there is an opportunity
+ // to combine into FMA, arrange such nodes accordingly.
+ if (TLI.isFPExtFree(VT)) {
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N00.getOperand(1)), N1);
+ }
+
+ // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N10.getOperand(1)), N0);
+ }
+ }
}
return SDValue();
@@ -6792,37 +7314,95 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
+ if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Assume if there is an fmad instruction that it should be aggressively
+ // used.
+ if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
+ return Fused;
+ }
+
// FSUB -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
- // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
-
- // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- N1.getOperand(0)),
- N1.getOperand(1), N0);
-
- // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
- ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
- TLI.enableAggressiveFMAFusion(VT))) {
- SDValue N00 = N0.getOperand(0).getOperand(0);
- SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
- DAG.getNode(ISD::FNEG, dl, VT, N1));
+ if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Don't form FMA if we are preferring FMAD.
+
+ if (SDValue Fused
+ = performFsubFmulCombines(ISD::FMA,
+ TLI.enableAggressiveFMAFusion(VT),
+ N, TLI, DAG)) {
+ return Fused;
+ }
+ }
+
+ // When FP_EXTEND nodes are free on the target, and there is an opportunity
+ // to combine into FMA, arrange such nodes accordingly.
+ if (TLI.isFPExtFree(VT)) {
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
+ }
+
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ VT, N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N10.getOperand(1)),
+ N0);
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ VT, N000.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N000.getOperand(1)),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+ }
+
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ VT, N000.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N000.getOperand(1)),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+ }
}
}
@@ -7104,6 +7684,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
}
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal.
+ // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+ // Notice that this is not always beneficial. One reason is different target
+ // may have different costs for FDIV and FMUL, so sometimes the cost of two
+ // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+ // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+ if (Options.UnsafeFPMath) {
+ // Skip if current node is a reciprocal.
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> Users;
+ // Find all FDIV users of the same divisor.
+ for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
+ UE = N1.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = UI.getUse().getUser();
+ if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
+ Users.push_back(User);
+ }
+
+ if (TLI.combineRepeatedFPDivisors(Users.size())) {
+ SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
+ if ((*I)->getOperand(0) != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
+ (*I)->getOperand(0), Reciprocal);
+ DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
+ }
+ }
+ return SDValue();
+ }
+ }
+
return SDValue();
}
@@ -7122,7 +7740,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ !TLI.isFsqrtCheap()) {
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
EVT VT = RV.getValueType();
@@ -7198,11 +7817,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
@@ -7251,11 +7870,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
@@ -7289,6 +7908,50 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return SDValue();
}
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+ return SDValue();
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+ bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+ unsigned ActualSize = std::min(InputSize, OutputSize);
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+ // We can only fold away the float conversion if the input range can be
+ // represented exactly in the float range.
+ if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+ if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+ unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+ }
+ if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+ if (SrcVT == VT)
+ return Src;
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
@@ -7298,7 +7961,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
if (N0CFP)
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
- return SDValue();
+ return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
@@ -7310,7 +7973,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
if (N0CFP)
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
- return SDValue();
+ return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
@@ -7329,11 +7992,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {
- // This is a value preserving truncation if both round's are.
- bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
- N0.getNode()->getConstantOperandVal(1) == 1;
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getIntPtrConstant(IsTrunc));
+ const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+ // If the first fp_round isn't a value preserving truncation, it might
+ // introduce a tie in the second fp_round, that wouldn't occur in the
+ // single-step fp_round we want to fold to.
+ // In other words, double rounding isn't the same as rounding.
+ // Also, this is a value preserving truncation iff both fp_round's are.
+ if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc));
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
@@ -7391,7 +8059,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -8923,7 +9591,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
if (NotMaskLZ == 64) return Result; // All zero mask.
// See if we have a continuous run of bits. If so, we have 0*1+0*
- if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+ if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
return Result;
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
@@ -9070,9 +9738,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ // The narrowing should be profitable, the load/store operation should be
+ // legal (or custom) and the store size should be equal to the NewVT width.
while (NewBW < BitWidth &&
- !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
- TLI.isNarrowingProfitable(VT, NewVT))) {
+ (NewVT.getStoreSizeInBits() != NewBW ||
+ !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+ !TLI.isNarrowingProfitable(VT, NewVT))) {
NewBW = NextPowerOf2(NewBW);
NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
}
@@ -9272,36 +9943,139 @@ struct BaseIndexOffset {
}
};
-/// Holds a pointer to an LSBaseSDNode as well as information on where it
-/// is located in a sequence of memory operations connected by a chain.
-struct MemOpLink {
- MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
- MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
- // Ptr to the mem node.
- LSBaseSDNode *MemNode;
- // Offset from the base ptr.
- int64_t OffsetFromBase;
- // What is the sequence number of this mem node.
- // Lowest mem operand in the DAG starts at zero.
- unsigned SequenceNum;
-};
+bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
+ unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+ // Make sure we have something to merge.
+ if (NumElem < 2)
+ return false;
+
+ int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned EarliestNodeUsed = 0;
+
+ for (unsigned i=0; i < NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // The earliest Node in the DAG.
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ SDLoc DL(StoreNodes[0].MemNode);
+
+ SDValue StoredVal;
+ if (UseVector) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ if (IsConstantSrc) {
+ // A vector store with a constant source implies that the constant is
+ // zero; we only handle merging stores of constant zeros because the zero
+ // can be materialized without a load.
+ // It may be beneficial to loosen this restriction to allow non-zero
+ // store merging.
+ StoredVal = DAG.getConstant(0, Ty);
+ } else {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = St->getValue();
+ // All of the operands of a BUILD_VECTOR must have the same type.
+ if (Val.getValueType() != MemVT)
+ return false;
+ Ops.push_back(Val);
+ }
+
+ // Build the extracted vector elements back into a vector.
+ StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
+ }
+ } else {
+ // We should always use a vector store when merging extracted vector
+ // elements, so this path implies a store of constants.
+ assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ APInt StoreInt(StoreBW, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = TLI.isLittleEndian();
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ SDValue Val = St->getValue();
+ StoreInt <<= ElementSizeBytes*8;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt |= C->getAPIntValue().zext(StoreBW);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+ } else {
+ llvm_unreachable("Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ StoredVal = DAG.getConstant(StoreInt, StoreTy);
+ }
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ false, false,
+ FirstInChain->getAlignment());
+
+ // Replace the first store with the new store
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
+ }
+
+ return true;
+}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
- bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat);
// Don't merge vectors into wider inputs.
if (MemVT.isVector() || !MemVT.isSimple())
return false;
// Perform an early exit check. Do not bother looking at stored values that
- // are not constants or loads.
+ // are not constants, loads, or extracted vector elements.
SDValue StoredVal = St->getValue();
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
- if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
- !IsLoadSrc)
+ bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+ isa<ConstantFPSDNode>(StoredVal);
+ bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
return false;
// Only look at ends of store sequences.
@@ -9443,7 +10217,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
// Store the constants into memory as one consecutive store.
- if (!IsLoadSrc) {
+ if (IsConstantSrc) {
unsigned LastLegalType = 0;
unsigned LastLegalVectorType = 0;
bool NonZero = false;
@@ -9492,85 +10266,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
- // Make sure we have something to merge.
- if (NumElem < 2)
- return false;
-
- unsigned EarliestNodeUsed = 0;
- for (unsigned i=0; i < NumElem; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // earliest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
- EarliestNodeUsed = i;
- }
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ true, UseVector);
+ }
- // The earliest Node in the DAG.
- LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
- SDLoc DL(StoreNodes[0].MemNode);
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecEltSrc) {
+ unsigned NumElem = 0;
+ for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = St->getValue();
+ // This restriction could be loosened.
+ // Bail out if any stored values are not elements extracted from a vector.
+ // It should be possible to handle mixed sources, but load sources need
+ // more careful handling (see the block of code below that handles
+ // consecutive loads).
+ if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
- SDValue StoredVal;
- if (UseVector) {
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
- assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
- StoredVal = DAG.getConstant(0, Ty);
- } else {
- unsigned StoreBW = NumElem * ElementSizeBytes * 8;
- APInt StoreInt(StoreBW, 0);
-
- // Construct a single integer constant which is made of the smaller
- // constant inputs.
- bool IsLE = TLI.isLittleEndian();
- for (unsigned i = 0; i < NumElem ; ++i) {
- unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
- SDValue Val = St->getValue();
- StoreInt<<=ElementSizeBytes*8;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
- StoreInt|=C->getAPIntValue().zext(StoreBW);
- } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
- StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
- } else {
- assert(false && "Invalid constant element type");
- }
- }
-
- // Create the new Load and Store operations.
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
- StoredVal = DAG.getConstant(StoreInt, StoreTy);
- }
-
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- false, false,
- FirstInChain->getAlignment());
-
- // Replace the first store with the new store
- CombineTo(EarliestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
- if (StoreNodes[i].MemNode == EarliestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(Ty))
+ NumElem = i + 1;
}
- return true;
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ false, true);
}
// Below we handle the case of multiple consecutive stores that
@@ -9668,9 +10390,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy))
LastLegalIntegerType = i+1;
}
}
@@ -10108,7 +10830,8 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
if (ResultVT.bitsGT(VecEltVT)) {
// If the result type of vextract is wider than the load, then issue an
// extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
+ VecEltVT)
? ISD::ZEXTLOAD
: ISD::EXTLOAD;
Load = DAG.getExtLoad(
@@ -10474,6 +11197,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
return SDValue();
+ // Just because the floating-point vector type is legal does not necessarily
+ // mean that the corresponding integer vector type is.
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
@@ -10519,26 +11247,37 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
return SDValue();
SDValue VecIn1, VecIn2;
+ bool UsesZeroVector = false;
for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue Op = N->getOperand(i);
// Ignore undef inputs.
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+
+ // See if we can combine this build_vector into a blend with a zero vector.
+ if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op.getNode())->isNullValue()) ||
+ (Op.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+ UsesZeroVector = true;
+ continue;
+ }
// If this input is something other than a EXTRACT_VECTOR_ELT with a
// constant index, bail out.
- if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op.getOperand(1))) {
VecIn1 = VecIn2 = SDValue(nullptr, 0);
break;
}
// We allow up to two distinct input vectors.
- SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ SDValue ExtractedFromVec = Op.getOperand(0);
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
if (!VecIn1.getNode()) {
VecIn1 = ExtractedFromVec;
- } else if (!VecIn2.getNode()) {
+ } else if (!VecIn2.getNode() && !UsesZeroVector) {
VecIn2 = ExtractedFromVec;
} else {
// Too many inputs.
@@ -10549,55 +11288,93 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
+ unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ unsigned Opcode = N->getOperand(i).getOpcode();
+ if (Opcode == ISD::UNDEF) {
Mask.push_back(-1);
continue;
}
+ // Operands can also be zero.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
+ assert(UsesZeroVector &&
+ (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
+ "Unexpected node found!");
+ Mask.push_back(NumInScalars+i);
+ continue;
+ }
+
// If extracting from the first vector, just use the index directly.
SDValue Extract = N->getOperand(i);
SDValue ExtVal = Extract.getOperand(1);
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (Extract.getOperand(0) == VecIn1) {
- unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
- if (ExtIndex > VT.getVectorNumElements())
- return SDValue();
-
Mask.push_back(ExtIndex);
continue;
}
- // Otherwise, use InIdx + VecSize
- unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
- Mask.push_back(Idx+NumInScalars);
+ // Otherwise, use InIdx + InputVecSize
+ Mask.push_back(InNumElements + ExtIndex);
}
+ // Avoid introducing illegal shuffles with zero.
+ if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+ return SDValue();
+
// We can't generate a shuffle node with mismatched input and output types.
// Attempt to transform a single input vector to the correct type.
if ((VT != VecIn1.getValueType())) {
- // We don't support shuffeling between TWO values of different types.
- if (VecIn2.getNode())
+ // If the input vector type has a different base type to the output
+ // vector type, bail out.
+ EVT VTElemType = VT.getVectorElementType();
+ if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
+ (VecIn2.getNode() &&
+ (VecIn2.getValueType().getVectorElementType() != VTElemType)))
return SDValue();
+ // If the input vector is too small, widen it.
// We only support widening of vectors which are half the size of the
// output registers. For example XMM->YMM widening on X86 with AVX.
- if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
- return SDValue();
+ EVT VecInT = VecIn1.getValueType();
+ if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
+ // If we only have one small input, widen it by adding undef values.
+ if (!VecIn2.getNode())
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
+ DAG.getUNDEF(VecIn1.getValueType()));
+ else if (VecIn1.getValueType() == VecIn2.getValueType()) {
+ // If we have two small inputs of the same type, try to concat them.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
+ VecIn2 = SDValue(nullptr, 0);
+ } else
+ return SDValue();
+ } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
+ // If the input vector is too large, try to split it.
+ // We don't support having two input vectors that are too large.
+ if (VecIn2.getNode())
+ return SDValue();
- // If the input vector type has a different base type to the output
- // vector type, bail out.
- if (VecIn1.getValueType().getVectorElementType() !=
- VT.getVectorElementType())
- return SDValue();
+ if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
+ return SDValue();
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ // Try to replace VecIn1 with two extract_subvectors
+ // No need to update the masks, they should still be correct.
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
+ VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ UsesZeroVector = false;
+ } else
+ return SDValue();
}
- // If VecIn2 is unused then change it to undef.
- VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ if (UsesZeroVector)
+ VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
+ DAG.getConstantFP(0.0, VT);
+ else
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
// Check that we were able to transform all incoming values to the same
// type.
@@ -10656,36 +11433,56 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
}
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
- if (N->getNumOperands() == 2 &&
- N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
- N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ bool AllBuildVectorsOrUndefs =
+ std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+ if (AllBuildVectorsOrUndefs) {
SmallVector<SDValue, 8> Opnds;
- unsigned BuildVecNumElts = N0.getNumOperands();
-
- EVT SclTy0 = N0.getOperand(0)->getValueType(0);
- EVT SclTy1 = N1.getOperand(0)->getValueType(0);
- if (SclTy0.isFloatingPoint()) {
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
- } else {
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint()) {
// If BUILD_VECTOR are from built from integer, they may have different
- // operand types. Get the smaller type and truncate all operands to it.
- EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N0.getOperand(i)));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N1.getOperand(i)));
+ // operand types. Get the smallest type and truncate all operands to it.
+ bool FoundMinVT = false;
+ for (const SDValue &Op : N->ops())
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ EVT OpSVT = Op.getOperand(0)->getValueType(0);
+ MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+ FoundMinVT = true;
+ }
+ assert(FoundMinVT && "Concat vector type mismatch");
+ }
+
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(Op.getOperand(i));
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
}
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
@@ -10881,7 +11678,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
}
-// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -10895,6 +11693,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
unsigned NumConcats = NumElts / NumElemsPerConcat;
+ // Special case: shuffle(concat(A,B)) can be more efficiently represented
+ // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+ // half vector elements.
+ if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
+ std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
+ SVN->getMask().end(), [](int i) { return i == -1; })) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
+ ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+ N1 = DAG.getUNDEF(ConcatVT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+ }
+
// Look at every vector that's inserted. We're looking for exact
// subvector-sized copies from a concatenated vector
for (unsigned I = 0; I != NumConcats; ++I) {
@@ -10993,7 +11803,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// If it is a splat, check if the argument vector is another splat or a
- // build_vector with all scalar elements the same.
+ // build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
@@ -11030,6 +11840,18 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Splat of <x, x, x, x>, return <x, x, x, x>
if (AllSame)
return N0;
+
+ // Canonicalize any other splat as a build_vector.
+ const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+ SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+ SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ V->getValueType(0), Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (V->getValueType(0) != VT)
+ NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+ return NewBV;
}
}
@@ -11050,121 +11872,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
- // If this shuffle node is simply a swizzle of another shuffle node,
- // then try to simplify it.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- N1.getOpcode() == ISD::UNDEF) {
-
- ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
-
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(OtherSV->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
-
- SmallVector<int, 4> Mask;
- // Compute the combined shuffle mask.
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- assert(Idx < (int)NumElts && "Index references undef operand");
- // Next, this index comes from the first value, which is the incoming
- // shuffle. Adopt the incoming index.
- if (Idx >= 0)
- Idx = OtherSV->getMaskElt(Idx);
- Mask.push_back(Idx);
- }
-
- // Check if all indices in Mask are Undef. In case, propagate Undef.
- bool isUndefMask = true;
- for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
- isUndefMask &= Mask[i] < 0;
-
- if (isUndefMask)
- return DAG.getUNDEF(VT);
-
- bool CommuteOperands = false;
- if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
- // To be valid, the combine shuffle mask should only reference elements
- // from one of the two vectors in input to the inner shufflevector.
- bool IsValidMask = true;
- for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
- // See if the combined mask only reference undefs or elements coming
- // from the first shufflevector operand.
- IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
-
- if (!IsValidMask) {
- IsValidMask = true;
- for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
- // Check that all the elements come from the second shuffle operand.
- IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
- CommuteOperands = IsValidMask;
- }
-
- // Early exit if the combined shuffle mask is not valid.
- if (!IsValidMask)
- return SDValue();
- }
-
- // See if this pair of shuffles can be safely folded according to either
- // of the following rules:
- // shuffle(shuffle(x, y), undef) -> x
- // shuffle(shuffle(x, undef), undef) -> x
- // shuffle(shuffle(x, y), undef) -> y
- bool IsIdentityMask = true;
- unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
- for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
- // Skip Undefs.
- if (Mask[i] < 0)
- continue;
-
- // The combined shuffle must map each index to itself.
- IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
- }
-
- if (IsIdentityMask) {
- if (CommuteOperands)
- // optimize shuffle(shuffle(x, y), undef) -> y.
- return OtherSV->getOperand(1);
-
- // optimize shuffle(shuffle(x, undef), undef) -> x
- // optimize shuffle(shuffle(x, y), undef) -> x
- return OtherSV->getOperand(0);
- }
-
- // It may still be beneficial to combine the two shuffles if the
- // resulting shuffle is legal.
- if (TLI.isTypeLegal(VT)) {
- if (!CommuteOperands) {
- if (TLI.isShuffleMaskLegal(Mask, VT))
- // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
- // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
- return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
- &Mask[0]);
- } else {
- // Compute the commuted shuffle mask.
- for (unsigned i = 0; i != NumElts; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElts)
- Mask[i] = idx + NumElts;
- else
- Mask[i] = idx - NumElts;
- }
-
- if (TLI.isShuffleMaskLegal(Mask, VT))
- // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
- return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1,
- &Mask[0]);
- }
- }
- }
-
// Canonicalize shuffles according to rules:
// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
// shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
- if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF &&
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
TLI.isTypeLegal(VT)) {
// The incoming shuffle must be of the same type as the result of the
@@ -11183,13 +11895,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// Try to fold according to rules:
- // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
// Don't try to fold shuffles with illegal type.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) {
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
// The incoming shuffle must be of the same type as the result of the
@@ -11197,14 +11909,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
assert(OtherSV->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
- SDValue SV0 = OtherSV->getOperand(0);
- SDValue SV1 = OtherSV->getOperand(1);
- bool HasSameOp0 = N1 == SV0;
- bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
- if (!HasSameOp0 && !IsSV1Undef && N1 != SV1)
- // Early exit.
- return SDValue();
-
+ SDValue SV0, SV1;
SmallVector<int, 4> Mask;
// Compute the combined shuffle mask for a shuffle with SV0 as the first
// operand, and SV1 as the second operand.
@@ -11216,14 +11921,49 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
continue;
}
+ SDValue CurrentVec;
if (Idx < (int)NumElts) {
+ // This shuffle index refers to the inner shuffle N0. Lookup the inner
+ // shuffle mask to identify which vector is actually referenced.
Idx = OtherSV->getMaskElt(Idx);
- if (IsSV1Undef && Idx >= (int) NumElts)
- Idx = -1; // Propagate Undef.
- } else
- Idx = HasSameOp0 ? Idx - NumElts : Idx;
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
+ : OtherSV->getOperand(1);
+ } else {
+ // This shuffle index references an element within N1.
+ CurrentVec = N1;
+ }
+
+ // Simple case where 'CurrentVec' is UNDEF.
+ if (CurrentVec.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Canonicalize the shuffle index. We don't know yet if CurrentVec
+ // will be the first or second operand of the combined shuffle.
+ Idx = Idx % NumElts;
+ if (!SV0.getNode() || SV0 == CurrentVec) {
+ // Ok. CurrentVec is the left hand side.
+ // Update the mask accordingly.
+ SV0 = CurrentVec;
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ // Bail out if we cannot convert the shuffle pair into a single shuffle.
+ if (SV1.getNode() && SV1 != CurrentVec)
+ return SDValue();
- Mask.push_back(Idx);
+ // Ok. CurrentVec is the right hand side.
+ // Update the mask accordingly.
+ SV1 = CurrentVec;
+ Mask.push_back(Idx + NumElts);
}
// Check if all indices in Mask are Undef. In case, propagate Undef.
@@ -11234,34 +11974,37 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (isUndefMask)
return DAG.getUNDEF(VT);
+ if (!SV0.getNode())
+ SV0 = DAG.getUNDEF(VT);
+ if (!SV1.getNode())
+ SV1 = DAG.getUNDEF(VT);
+
// Avoid introducing shuffles with illegal mask.
- if (TLI.isShuffleMaskLegal(Mask, VT)) {
- if (IsSV1Undef)
- // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
- }
+ if (!TLI.isShuffleMaskLegal(Mask, VT)) {
+ // Compute the commuted shuffle mask and test again.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElts)
+ Mask[i] = idx + NumElts;
+ else
+ Mask[i] = idx - NumElts;
+ }
- // Compute the commuted shuffle mask.
- for (unsigned i = 0; i != NumElts; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElts)
- Mask[i] = idx + NumElts;
- else
- Mask[i] = idx - NumElts;
- }
+ if (!TLI.isShuffleMaskLegal(Mask, VT))
+ return SDValue();
- if (TLI.isShuffleMaskLegal(Mask, VT)) {
- if (IsSV1Undef)
- // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), N1, SV0, &Mask[0]);
- // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV1, SV0, &Mask[0]);
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ std::swap(SV0, SV1);
}
+
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
}
return SDValue();
@@ -11322,9 +12065,11 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
}
- // Let's see if the target supports this vector_shuffle.
+ // Let's see if the target supports this vector_shuffle and make sure
+ // we're not running after operation legalization where it may have
+ // custom lowered the vector shuffles.
EVT RVT = RHS.getValueType();
- if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 8facbc2..1df4a1d 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -40,12 +40,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/FastISel.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -62,7 +63,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -728,6 +728,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// For AnyRegCC the arguments are lowered later on manually.
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
CallLoweringInfo CLI;
+ CLI.setIsPatchPoint();
if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI))
return false;
@@ -1579,7 +1580,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
bool SkipTargetIndependentISel)
: FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
- TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()),
+ TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()),
TII(*MF->getSubtarget().getInstrInfo()),
TLI(*MF->getSubtarget().getTargetLowering()),
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 86b9542..7e72dc6 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -133,16 +133,17 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
ImmutableCallSite CS(I);
if (isa<InlineAsm>(CS.getCalledValue())) {
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
- TLI->ParseConstraints(CS);
+ TLI->ParseConstraints(TRI, CS);
for (size_t I = 0, E = Ops.size(); I != E; ++I) {
TargetLowering::AsmOperandInfo &Op = Ops[I];
if (Op.Type == InlineAsm::isClobber) {
// Clobbers don't have SDValue operands, hence SDValue().
TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
std::pair<unsigned, const TargetRegisterClass *> PhysReg =
- TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
- Op.ConstraintVT);
+ TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
+ Op.ConstraintVT);
if (PhysReg.first == SP)
MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
}
@@ -273,6 +274,7 @@ void FunctionLoweringInfo::clear() {
ArgDbgValues.clear();
ByValArgFrameIndexMap.clear();
RegFixups.clear();
+ StatepointStackSlots.clear();
PreferredExtendType.clear();
}
@@ -470,60 +472,6 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
}
}
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
- MachineBasicBlock *MBB) {
- // Inform the MachineModuleInfo of the personality for this landing pad.
- const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
- assert(CE->getOpcode() == Instruction::BitCast &&
- isa<Function>(CE->getOperand(0)) &&
- "Personality should be a function");
- MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
-
- // Gather all the type infos for this landing pad and pass them along to
- // MachineModuleInfo.
- std::vector<const GlobalValue *> TyInfo;
- unsigned N = I.getNumArgOperands();
-
- for (unsigned i = N - 1; i > 1; --i) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
- unsigned FilterLength = CI->getZExtValue();
- unsigned FirstCatch = i + FilterLength + !FilterLength;
- assert(FirstCatch <= N && "Invalid filter length");
-
- if (FirstCatch < N) {
- TyInfo.reserve(N - FirstCatch);
- for (unsigned j = FirstCatch; j < N; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
- MMI->addCatchTypeInfo(MBB, TyInfo);
- TyInfo.clear();
- }
-
- if (!FilterLength) {
- // Cleanup.
- MMI->addCleanup(MBB);
- } else {
- // Filter.
- TyInfo.reserve(FilterLength - 1);
- for (unsigned j = i + 1; j < FirstCatch; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
- MMI->addFilterTypeInfo(MBB, TyInfo);
- TyInfo.clear();
- }
-
- N = i;
- }
- }
-
- if (N > 2) {
- TyInfo.reserve(N - 2);
- for (unsigned j = 2; j < N; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
- MMI->addCatchTypeInfo(MBB, TyInfo);
- }
-}
-
/// AddLandingPadInfo - Extract the exception handling information from the
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a65f33e..93699a7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
Type *Type = CP->getType();
// MachineConstantPool wants an explicit alignment.
if (Align == 0) {
- Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type);
+ Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type);
if (Align == 0) {
// Alignment of vector types. FIXME!
- Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type);
+ Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type);
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5d17a5f..61c0a6f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -40,7 +40,7 @@ using namespace llvm;
#define DEBUG_TYPE "legalizedag"
//===----------------------------------------------------------------------===//
-/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// This takes an arbitrary SelectionDAG as input and
/// hacks on it until the target machine can handle it. This involves
/// eliminating value sizes the machine cannot handle (promoting small sizes to
/// large sizes or splitting up large values into small values) as well as
@@ -86,7 +86,7 @@ private:
void LegalizeLoadOps(SDNode *Node);
void LegalizeStoreOps(SDNode *Node);
- /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+ /// Some targets cannot handle a variable
/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
/// the insert there, and then read the result back.
@@ -95,7 +95,7 @@ private:
SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
SDValue Idx, SDLoc dl);
- /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+ /// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
@@ -200,7 +200,7 @@ public:
};
}
-/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
@@ -232,7 +232,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl,
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
}
-/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// Expands the ConstantFP node to an integer constant or
/// a load from the constant pool.
SDValue
SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
@@ -260,7 +260,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
// Only do this if the target has a native EXTLOAD instruction from
// smaller type.
- TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
TLI.ShouldShrinkFPConstant(OrigVT)) {
Type *SType = SVT.getTypeForEVT(*DAG.getContext());
LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
@@ -286,7 +286,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
return Result;
}
-/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+/// Expands an unaligned store to 2 half-size stores.
static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
const TargetLowering &TLI,
SelectionDAGLegalize *DAGLegalize) {
@@ -409,7 +409,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
}
-/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+/// Expands an unaligned load to 2 half-size loads.
static void
ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -561,8 +561,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
ChainResult = TF;
}
-/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
-/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+/// Some target cannot handle a variable insertion index for the
+/// INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
/// the insert there, and then read the result back.
SDValue SelectionDAGLegalize::
@@ -725,14 +725,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
if (Align < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node),
- DAG, TLI, this);
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
break;
}
case TargetLowering::Custom: {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res.getNode())
+ if (Res && Res != SDValue(Node, 0))
ReplaceNode(SDValue(Node, 0), Res);
return;
}
@@ -766,8 +765,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment,
- AAInfo);
+ NVT, isVolatile, isNonTemporal, Alignment, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -845,7 +843,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
}
case TargetLowering::Custom: {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res.getNode())
+ if (Res && Res != SDValue(Node, 0))
ReplaceNode(SDValue(Node, 0), Res);
return;
}
@@ -946,7 +944,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// nice to have an effective generic way of getting these benefits...
// Until such a way is found, don't insist on promoting i1 here.
(SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) ==
+ TargetLowering::Promote)) {
// Promote to a byte-sized load if not loading an integral number of
// bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
unsigned NewWidth = SrcVT.getStoreSizeInBits();
@@ -1058,7 +1057,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Ch;
} else {
bool isCustom = false;
- switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) {
+ switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0),
+ SrcVT.getSimpleVT())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
@@ -1080,36 +1080,35 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
- Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getDataLayout()->getABITypeAlignment(Ty);
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty);
if (Align < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Value, Chain);
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
}
}
}
break;
}
case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) &&
- TLI.isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
- LD->getMemOperand());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+ // If the source type is not legal, see if there is a legal extload to
+ // an intermediate type that we can then extend further.
+ EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
+ if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
+ // If we are loading a legal type, this is a non-extload followed by a
+ // full extend.
+ ISD::LoadExtType MidExtType =
+ (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType;
+
+ SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr,
+ SrcVT, LD->getMemOperand());
+ unsigned ExtendOp =
+ ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType);
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
}
- Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Chain = Load.getValue(1);
- break;
}
assert(!SrcVT.isVector() &&
@@ -1133,8 +1132,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Result.getValueType(),
Result, DAG.getValueType(SrcVT));
else
- ValRes = DAG.getZeroExtendInReg(Result, dl,
- SrcVT.getScalarType());
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
Value = ValRes;
Chain = Result.getValue(1);
break;
@@ -1155,8 +1153,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
-/// LegalizeOp - Return a legal replacement for the given operation, with
-/// all legal operands.
+/// Return a legal replacement for the given operation, with all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
@@ -1642,8 +1639,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
Results.push_back(Tmp2);
}
-/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
-/// condition code CC on the current target.
+/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
+/// target.
///
/// If the SETCC has been legalized using AND / OR, then the legalized node
/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
@@ -1757,7 +1754,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
return false;
}
-/// EmitStackConvert - Emit a store/load combination to the stack. This stores
+/// Emit a store/load combination to the stack. This stores
/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
/// a load from the stack slot to DestVT, extending it if needed.
/// The resultant code need not be legal.
@@ -1917,7 +1914,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
return true;
}
-/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// Expand a BUILD_VECTOR node on targets that don't
/// support the operation, but do support the resultant vector type.
SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
unsigned NumElems = Node->getNumOperands();
@@ -2029,7 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
return ExpandVectorBuildThroughStack(Node);
}
-// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// Expand a node into a call to a libcall. If the result value
// does not fit into a register, return the lo part and set the hi part to the
// by-reg argument. If it does fit into a single register, return the result
// and leave the Hi part unset.
@@ -2077,7 +2074,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return CallInfo.first;
}
-/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// Generate a libcall taking the given operands as arguments
/// and returning a result of type RetVT.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
const SDValue *Ops, unsigned NumOps,
@@ -2108,7 +2105,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
return CallInfo.first;
}
-// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// Expand a node into a call to a libcall. Similar to
// ExpandLibCall except that the first operand is the in-chain.
std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
@@ -2178,7 +2175,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
-/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+/// Return true if divmod libcall is available.
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
@@ -2194,8 +2191,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
return TLI.getLibcallName(LC) != nullptr;
}
-/// useDivRem - Only issue divrem libcall if both quotient and remainder are
-/// needed.
+/// Only issue divrem libcall if both quotient and remainder are needed.
static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
// The other use might have been replaced with a divrem already.
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
@@ -2220,8 +2216,7 @@ static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
return false;
}
-/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
-/// pairs.
+/// Issue libcalls to __{u}divmod to compute div / rem pairs.
void
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
@@ -2283,7 +2278,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Results.push_back(Rem);
}
-/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
+/// Return true if sincos libcall is available.
static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
@@ -2297,8 +2292,8 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
return TLI.getLibcallName(LC) != nullptr;
}
-/// canCombineSinCosLibcall - Return true if sincos libcall is available and
-/// can be used to combine sin and cos.
+/// Return true if sincos libcall is available and can be used to combine sin
+/// and cos.
static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
const TargetMachine &TM) {
if (!isSinCosLibcallAvailable(Node, TLI))
@@ -2311,8 +2306,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
return true;
}
-/// useSinCos - Only issue sincos libcall if both sin and cos are
-/// needed.
+/// Only issue sincos libcall if both sin and cos are needed.
static bool useSinCos(SDNode *Node) {
unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
? ISD::FCOS : ISD::FSIN;
@@ -2330,8 +2324,7 @@ static bool useSinCos(SDNode *Node) {
return false;
}
-/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos
-/// pairs.
+/// Issue libcalls to sincos to compute sin / cos pairs.
void
SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
@@ -2396,7 +2389,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
MachinePointerInfo(), false, false, false, 0));
}
-/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
/// legal for the target.
@@ -2594,7 +2587,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
}
-/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// This function is responsible for legalizing a
/// *INT_TO_FP operation of the specified operand when the target requests that
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
@@ -2636,7 +2629,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
dl, NewInTy, LegalOp));
}
-/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// This function is responsible for legalizing a
/// FP_TO_*INT operation of the specified operand when the target requests that
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
@@ -2680,8 +2673,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
}
-/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
-///
+/// Open code the operations for BSWAP of the specified operation.
SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
EVT VT = Op.getValueType();
EVT SHVT = TLI.getShiftAmountTy(VT);
@@ -2727,8 +2719,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
}
}
-/// ExpandBitCount - Expand the specified bitcount instruction into operations.
-///
+/// Expand the specified bitcount instruction into operations.
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
SDLoc dl) {
switch (Opc) {
@@ -3528,6 +3519,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
+ case ISD::FMAD:
+ llvm_unreachable("Illegal fmad should never be formed");
+
case ISD::FADD:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
RTLIB::ADD_F80, RTLIB::ADD_F128,
@@ -3554,6 +3548,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::FP_TO_FP16: {
+ if (!TM.Options.UseSoftFloat && TM.Options.UnsafeFPMath) {
+ SDValue Op = Node->getOperand(0);
+ MVT SVT = Op.getSimpleValueType();
+ if ((SVT == MVT::f64 || SVT == MVT::f80) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
+ // Under fastmath, we can expand this node into a fround followed by
+ // a float-half conversion.
+ SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
+ break;
+ }
+ }
+
RTLIB::Libcall LC =
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
@@ -4319,8 +4328,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
ReplaceNode(Node, Results.data());
}
-// SelectionDAG::Legalize - This is the entry point for the file.
-//
+/// This is the entry point for the file.
void SelectionDAG::Legalize() {
AssignTopologicalOrder();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4591e79..b596715 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, false, dl).first;
+ &Op, 1, Signed, dl).first;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b73bb0a..5507c70 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -66,6 +66,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break;
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
@@ -454,6 +455,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
+
+ SDValue Mask = N->getMask();
+ EVT NewMaskVT = getSetCCResultType(NVT);
+ if (NewMaskVT != N->getMask().getValueType())
+ Mask = PromoteTargetBoolean(Mask, NewMaskVT);
+ SDLoc dl(N);
+
+ SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
+ Mask, ExtSrc0, N->getMemoryVT(),
+ N->getMemOperand(), ISD::SEXTLOAD);
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Simply change the return type of the boolean result.
@@ -825,6 +844,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
+ case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
+ OpNo); break;
+ case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
+ OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
@@ -1091,6 +1114,64 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getMemoryVT(), N->getMemOperand());
}
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
+
+ assert(OpNo == 2 && "Only know how to promote the mask!");
+ SDValue DataOp = N->getValue();
+ EVT DataVT = DataOp.getValueType();
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDLoc dl(N);
+
+ bool TruncateStore = false;
+ if (!TLI.isTypeLegal(DataVT)) {
+ if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
+ DataOp = GetPromotedInteger(DataOp);
+ Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ TruncateStore = true;
+ }
+ else {
+ assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
+ "Unexpected data legalization in MSTORE");
+ DataOp = GetWidenedVector(DataOp);
+
+ if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ EVT BoolVT = getSetCCResultType(DataOp.getValueType());
+
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+ }
+ }
+ else
+ Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
+ return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
+ N->getMemoryVT(), N->getMemOperand(),
+ TruncateStore);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
+ assert(OpNo == 2 && "Only know how to promote the mask!");
+ EVT DataVT = N->getValueType(0);
+ SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
@@ -2936,17 +3017,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
- unsigned NumElts = VT.getVectorNumElements();
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- NewMask.push_back(SV->getMaskElt(i));
- }
+ ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());
SDValue V0 = GetPromotedInteger(N->getOperand(0));
SDValue V1 = GetPromotedInteger(N->getOperand(1));
EVT OutVT = V0.getValueType();
- return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index bd7dacf..ebf6b28 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -921,6 +921,17 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
// The target didn't want to custom lower it after all.
return false;
+ // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to
+ // provide the same kind of custom splitting behavior.
+ if (Results.size() == N->getNumValues() + 1 && LegalizeResult) {
+ // We've legalized a return type by splitting it. If there is a chain,
+ // replace that too.
+ SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]);
+ if (N->getNumValues() > 1)
+ ReplaceValueWith(SDValue(N, 1), Results[2]);
+ return true;
+ }
+
// Make everything that once used N's values now use those in Results instead.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 30f412b..cef3fc9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -240,6 +240,7 @@ private:
SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_SDIV(SDNode *N);
@@ -285,6 +286,8 @@ private:
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -578,6 +581,7 @@ private:
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -594,6 +598,7 @@ private:
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_TRUNCATE(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
@@ -627,6 +632,7 @@ private:
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
@@ -653,6 +659,7 @@ private:
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b5af7b7..03c2734 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -200,12 +200,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
- switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getMemoryVT())) {
+ switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
+ LD->getMemoryVT())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
+ if (Lowered == Result)
+ return TranslateLegalizeResults(Op, Lowered);
Changed = true;
if (Lowered->getNumValues() != Op->getNumValues()) {
// This expanded to something other than the load. Assume the
@@ -231,9 +234,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
- case TargetLowering::Custom:
- Changed = true;
- return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG));
+ case TargetLowering::Custom: {
+ SDValue Lowered = TLI.LowerOperation(Result, DAG);
+ Changed = Lowered != Result;
+ return TranslateLegalizeResults(Op, Lowered);
+ }
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandStore(Op));
@@ -389,7 +394,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
if (Op.getOperand(j)
.getValueType()
.getVectorElementType()
- .isFloatingPoint())
+ .isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
else
Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
@@ -398,8 +404,9 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
- if (VT.isFloatingPoint() ||
- (VT.isVector() && VT.getVectorElementType().isFloatingPoint()))
+ if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
+ (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));
else
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
@@ -509,7 +516,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment(),
+ LD->isInvariant(),
+ MinAlign(LD->getAlignment(), Offset),
LD->getAAInfo());
} else {
EVT LoadVT = WideVT;
@@ -521,7 +529,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
LD->getPointerInfo().getWithOffset(Offset),
LoadVT, LD->isVolatile(),
LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment(), LD->getAAInfo());
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getAAInfo());
}
RemainingBytes -= LoadBytes;
@@ -553,9 +562,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
BitOffset += SrcEltBits;
if (BitOffset >= WideBits) {
WideIdx++;
- Offset -= WideBits;
- if (Offset > 0) {
- ShAmt = DAG.getConstant(SrcEltBits - Offset,
+ BitOffset -= WideBits;
+ if (BitOffset > 0) {
+ ShAmt = DAG.getConstant(SrcEltBits - BitOffset,
TLI.getShiftAmountTy(WideVT));
Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
@@ -592,7 +601,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment(), LD->getAAInfo());
+ MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
@@ -651,7 +660,8 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment, AAInfo);
+ isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride),
+ AAInfo);
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 27f63d2..63671f7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -597,6 +597,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
+ case ISD::MLOAD:
+ SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
+ break;
case ISD::SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
@@ -979,6 +982,67 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
ReplaceValueWith(SDValue(LD, 1), Ch);
}
+void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
+ SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MLD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Ch = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ SDValue Mask = MLD->getMask();
+ unsigned Alignment = MLD->getOriginalAlignment();
+ ISD::LoadExtType ExtType = MLD->getExtensionType();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+ EVT MemoryVT = MLD->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue Src0 = MLD->getSrc0();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ ExtType);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ ExtType);
+
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MLD, 1), Ch);
+
+}
+
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
@@ -1234,6 +1298,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
+ case ISD::MSTORE:
+ Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+ break;
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
@@ -1395,6 +1462,58 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
MachinePointerInfo(), EltVT, false, false, false, 0);
}
+SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Mask = N->getMask();
+ SDValue Data = N->getValue();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ SDLoc DL(N);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ GetSplitVector(Data, DataLo, DataHi);
+ SDValue MaskLo, MaskHi;
+ GetSplitVector(Mask, MaskLo, MaskHi);
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ SDValue Lo, Hi;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ N->isTruncatingStore());
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, N->getAAInfo(), N->getRanges());
+
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ N->isTruncatingStore());
+
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
assert(N->isUnindexed() && "Indexed store of vector?");
assert(OpNo == 1 && "Can only split the stored value");
@@ -1599,6 +1718,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SHUFFLE:
Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
break;
+ case ISD::MLOAD:
+ Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
case ISD::ADD:
case ISD::AND:
@@ -2289,6 +2411,44 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return Result;
}
+SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue Src0 = GetWidenedVector(N->getSrc0());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ SDLoc dl(N);
+
+ if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ EVT BoolVT = getSetCCResultType(WidenVT);
+
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+
+ SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
+ Mask, Src0, N->getMemoryVT(),
+ N->getMemOperand(), ExtType);
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
@@ -2434,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::ANY_EXTEND:
@@ -2632,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}
+SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue StVal = MST->getValue();
+ // Widen the value
+ SDValue WideVal = GetWidenedVector(StVal);
+ SDLoc dl(N);
+
+ if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ // The mask should be widened as well
+ EVT BoolVT = getSetCCResultType(WideVal.getValueType());
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+ assert(Mask.getValueType().getVectorNumElements() ==
+ WideVal.getValueType().getVectorNumElements() &&
+ "Mask and data vectors should have the same number of elements");
+ return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
+ Mask, MST->getMemoryVT(), MST->getMemOperand(),
+ false);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 8b9f618..3853ada 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -137,13 +137,9 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
}
// Helper for AddGlue to clone node operands.
-static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
- SmallVectorImpl<EVT> &VTs,
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
SDValue ExtraOper = SDValue()) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
- Ops.push_back(N->getOperand(I));
-
+ SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
if (ExtraOper.getNode())
Ops.push_back(ExtraOper);
@@ -165,7 +161,6 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
}
static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
- SmallVector<EVT, 4> VTs;
SDNode *GlueDestNode = Glue.getNode();
// Don't add glue from a node to itself.
@@ -179,9 +174,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
// Don't add glue to something that already has a glue value.
if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
- for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
- VTs.push_back(N->getValueType(I));
-
+ SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
if (AddGlue)
VTs.push_back(MVT::Glue);
@@ -197,11 +190,8 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
!N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
"expected an unused glue value");
- SmallVector<EVT, 4> VTs;
- for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
- VTs.push_back(N->getValueType(I));
-
- CloneNodeWithValues(N, DAG, VTs);
+ CloneNodeWithValues(N, DAG,
+ makeArrayRef(N->value_begin(), N->getNumValues() - 1));
}
/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
@@ -551,6 +541,14 @@ void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
NodeNumDefs = 0;
return;
}
+ if (POpc == TargetOpcode::PATCHPOINT &&
+ Node->getValueType(0) == MVT::Other) {
+ // PATCHPOINT is defined to have one result, but it might really have none
+ // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
+ // real definition.
+ NodeNumDefs = 0;
+ return;
+ }
unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
// Some instructions define regs that are not represented in the selection DAG
// (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7961e66..9466f4d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -234,10 +234,10 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return true;
}
-ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
+ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
- return ISD::ANY_EXTEND;
+ return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
case ISD::SEXTLOAD:
return ISD::SIGN_EXTEND;
case ISD::ZEXTLOAD:
@@ -1484,6 +1484,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
if (N1.getOpcode() == ISD::UNDEF)
commuteShuffle(N1, N2, MaskVec);
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
+
+ for (int i = 0; i < (int)NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts))
+ continue;
+
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
+
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
bool AllLHS = true, AllRHS = true;
@@ -1513,9 +1541,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
return getUNDEF(VT);
// If Identity shuffle return that node.
- bool Identity = true;
+ bool Identity = true, AllSame = true;
for (unsigned i = 0; i != NElts; ++i) {
if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] != MaskVec[0]) AllSame = false;
}
if (Identity && NElts)
return N1;
@@ -1537,18 +1566,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
if (Splat && Splat.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
+ bool SameNumElts =
+ V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
// We only have a splat which can skip shuffles if there is a splatted
// value and no undef lanes rearranged by the shuffle.
if (Splat && UndefElements.none()) {
// Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
// number of elements match or the value splatted is a zero constant.
- if (V.getValueType().getVectorNumElements() ==
- VT.getVectorNumElements())
+ if (SameNumElts)
return N1;
if (auto *C = dyn_cast<ConstantSDNode>(Splat))
if (C->isNullValue())
return N1;
}
+
+ // If the shuffle itself creates a splat, build the vector directly.
+ if (AllSame && SameNumElts) {
+ const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+ SmallVector<SDValue, 8> Ops(NElts, Splatted);
+
+ EVT BuildVT = BV->getValueType(0);
+ SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (BuildVT != VT)
+ NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+ return NewBV;
+ }
}
}
@@ -2323,6 +2369,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
break;
}
+ case ISD::EXTRACT_ELEMENT: {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ const unsigned Index =
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ const unsigned BitWidth = Op.getValueType().getSizeInBits();
+
+ // Remove low part of known bits mask
+ KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
+ KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+
+ // Remove high part of known bit mask
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ break;
+ }
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
@@ -2522,6 +2583,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// FIXME: it's tricky to do anything useful for this, but it is an important
// case for targets like X86.
break;
+ case ISD::EXTRACT_ELEMENT: {
+ const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ const int BitWidth = Op.getValueType().getSizeInBits();
+ const int Items =
+ Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+
+ // Get reverse index (starting from 1), Op1 value indexes elements from
+ // little end. Sign starts at big end.
+ const int rIndex = Items - 1 -
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ // If the sign portion ends in our element the substraction gives correct
+ // result. Otherwise it gives either negative or > bitwidth result
+ return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+ }
}
// If we are looking at the loaded value of the SDNode.
@@ -2683,6 +2759,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstantFP(apf, VT);
}
case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT);
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2756,7 +2834,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstant(api, VT);
}
case ISD::BITCAST:
- if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
@@ -3379,8 +3459,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Perform trivial constant folding.
- SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
- if (SV.getNode()) return SV;
+ if (SDValue SV =
+ FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()))
+ return SV;
// Canonicalize constant to RHS if commutative.
if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
@@ -3564,7 +3645,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
const APFloat &V3 = N3CFP->getValueAPF();
APFloat::opStatus s =
V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
- if (s != APFloat::opInvalidOp)
+ if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
return getConstantFP(V1, VT);
}
break;
@@ -3913,9 +3994,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize =
- MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4028,8 +4107,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4123,8 +4201,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4214,11 +4291,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memcpy with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result =
- TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
- isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemcpy(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// If we really need inline code and the target declined to provide it,
// use a (potentially long) sequence of loads and stores.
@@ -4280,10 +4359,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result = TSI->EmitTargetCodeForMemmove(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemmove(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
@@ -4332,10 +4413,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memset with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src,
- Size, Align, isVol, DstPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemset(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// Emit a library call.
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
@@ -4680,10 +4763,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
assert(VT.isInteger() == MemVT.isInteger() &&
"Cannot convert from FP to Int or Int -> FP!");
assert(VT.isVector() == MemVT.isVector() &&
- "Cannot use trunc store to convert to or from a vector!");
+ "Cannot use an ext load to convert to or from a vector!");
assert((!VT.isVector() ||
VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
- "Cannot use trunc store to change the number of vector elements!");
+ "Cannot use an ext load to change the number of vector elements!");
}
bool Indexed = AM != ISD::UNINDEXED;
@@ -4917,6 +5000,61 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
return SDValue(N, 0);
}
+SDValue
+SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
+ MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
+ MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Ops, 4, VTs,
+ ExtTy, MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO, bool isTrunc) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Ops, 4,
+ VTs, isTrunc, MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
SDValue Chain, SDValue Ptr,
SDValue SV,
@@ -6495,11 +6633,25 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- // Handle X+C
- if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
- cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
- return true;
-
+ // Handle X + C.
+ if (isBaseWithConstantOffset(Loc)) {
+ int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc) {
+ // If the base location is a simple address with no offset itself, then
+ // the second load's first add operand should be the base address.
+ if (LocOffset == Dist * (int)Bytes)
+ return true;
+ } else if (isBaseWithConstantOffset(BaseLoc)) {
+ // The base location itself has an offset, so subtract that value from the
+ // second load's offset before comparing to distance * size.
+ int64_t BOffset =
+ cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
+ if ((LocOffset - BOffset) == Dist * (int)Bytes)
+ return true;
+ }
+ }
+ }
const GlobalValue *GV1 = nullptr;
const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8f582f1..097b618 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -16,9 +16,11 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
@@ -46,6 +48,8 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -54,7 +58,6 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -564,6 +567,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
+ assert(NumIntermediates != 0 && "division by zero");
assert(NumParts % NumIntermediates == 0 &&
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
@@ -865,7 +869,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
AA = &aa;
GFI = gfi;
LibInfo = li;
- DL = DAG.getSubtarget().getDataLayout();
+ DL = DAG.getTarget().getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
}
@@ -884,6 +888,7 @@ void SelectionDAGBuilder::clear() {
CurInst = nullptr;
HasTailCall = false;
SDNodeOrder = LowestSDNodeOrder;
+ StatepointLowering.clear();
}
/// clearDanglingDebugInfo - Clear the dangling debug information
@@ -1234,24 +1239,29 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
unsigned NumValues = ValueVTs.size();
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
- for (unsigned j = 0, f = NumValues; j != f; ++j) {
- EVT VT = ValueVTs[j];
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ const Function *F = I.getParent()->getParent();
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ LLVMContext &Context = F->getContext();
+ bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::InReg);
- const Function *F = I.getParent()->getParent();
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::SExt))
- ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::ZExt))
- ExtendKind = ISD::ZERO_EXTEND;
+ for (unsigned j = 0; j != NumValues; ++j) {
+ EVT VT = ValueVTs[j];
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
- VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
+ VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind);
- unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
- MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumParts = TLI.getNumRegisters(Context, VT);
+ MVT PartVT = TLI.getRegisterType(Context, VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
@@ -1259,8 +1269,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::InReg))
+ if (RetInReg)
Flags.setInReg();
// Propagate extension type if any
@@ -1405,7 +1414,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
} else {
- Condition = ISD::SETEQ; // silence warning.
+ (void)Condition; // silence warning.
llvm_unreachable("Unknown compare instruction");
}
@@ -1947,7 +1956,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
Reg, VT);
SDValue Cmp;
- unsigned PopCount = CountPopulation_64(B.Mask);
+ unsigned PopCount = countPopulation(B.Mask);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
@@ -1959,7 +1968,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
// There is only one zero bit in the range, test for it directly.
Cmp = DAG.getSetCC(
getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
- DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE);
+ DAG.getConstant(countTrailingOnes(B.Mask), VT), ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT,
@@ -2062,10 +2071,14 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// Get the two live-in registers as SDValues. The physregs have already been
// copied into virtual registers.
SDValue Ops[2];
- Ops[0] = DAG.getZExtOrTrunc(
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
- getCurSDLoc(), ValueVTs[0]);
+ if (FuncInfo.ExceptionPointerVirtReg) {
+ Ops[0] = DAG.getZExtOrTrunc(
+ DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+ getCurSDLoc(), ValueVTs[0]);
+ } else {
+ Ops[0] = DAG.getConstant(0, TLI.getPointerTy());
+ }
Ops[1] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
@@ -2077,6 +2090,27 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
setValue(&LP, Res);
}
+unsigned
+SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV,
+ MachineBasicBlock *LPadBB) {
+ SDValue Chain = getControlRoot();
+
+ // Get the typeid that we will dispatch on later.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV);
+ SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy());
+ Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel);
+
+ // Branch to the main landing pad block.
+ MachineBasicBlock *ClauseMBB = FuncInfo.MBB;
+ ClauseMBB->addSuccessor(LPadBB);
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain,
+ DAG.getBasicBlock(LPadBB)));
+ return VReg;
+}
+
/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
/// small case ranges).
bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
@@ -2363,17 +2397,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
MachineBasicBlock* SwitchBB) {
- // Get the MachineFunction which holds the current MBB. This is used when
- // inserting any additional MBBs necessary to represent the switch.
- MachineFunction *CurMF = FuncInfo.MF;
-
- // Figure out which block is immediately after the current one.
- MachineFunction::iterator BBI = CR.CaseBB;
- ++BBI;
-
Case& FrontCase = *CR.Range.first;
Case& BackCase = *(CR.Range.second-1);
- const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
// Size is the number of Cases represented by this range.
unsigned Size = CR.Range.second - CR.Range.first;
@@ -2395,6 +2420,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
DEBUG(dbgs() << "Selecting best pivot: \n"
<< "First: " << First << ", Last: " << Last <<'\n'
<< "LSize: " << LSize << ", RSize: " << RSize << '\n');
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
J!=E; ++I, ++J) {
const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
@@ -2404,13 +2430,17 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
"Invalid case distance");
// Use volatile double here to avoid excess precision issues on some hosts,
// e.g. that use 80-bit X87 registers.
+ // Only consider the density of sub-ranges that actually have sufficient
+ // entries to be lowered as a jump table.
volatile double LDensity =
- (double)LSize.roundToDouble() /
- (LEnd - First + 1ULL).roundToDouble();
+ LSize.ult(TLI.getMinimumJumpTableEntries())
+ ? 0.0
+ : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble();
volatile double RDensity =
- (double)RSize.roundToDouble() /
- (Last - RBegin + 1ULL).roundToDouble();
- volatile double Metric = Range.logBase2()*(LDensity+RDensity);
+ RSize.ult(TLI.getMinimumJumpTableEntries())
+ ? 0.0
+ : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble();
+ volatile double Metric = Range.logBase2() * (LDensity + RDensity);
// Should always split in some non-trivial place
DEBUG(dbgs() <<"=>Step\n"
<< "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
@@ -2427,13 +2457,25 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
RSize -= J->size();
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (areJTsAllowed(TLI)) {
- // If our case is dense we *really* should handle it earlier!
- assert((FMetric > 0) && "Should handle dense range earlier!");
- } else {
+ if (FMetric == 0 || !areJTsAllowed(TLI))
Pivot = CR.Range.first + Size/2;
- }
+ splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB);
+ return true;
+}
+
+void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
+ CaseRecVector &WorkList,
+ const Value *SV,
+ MachineBasicBlock *SwitchBB) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
@@ -2446,10 +2488,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// LHS's Case Value, and that Case Value is exactly one less than the
// Pivot's Value, then we can branch directly to the LHS's Target,
// rather than creating a leaf node for it.
- if ((LHSR.second - LHSR.first) == 1 &&
- LHSR.first->High == CR.GE &&
+ if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE &&
cast<ConstantInt>(C)->getValue() ==
- (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
TrueBB = LHSR.first->BB;
} else {
TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
@@ -2466,12 +2507,12 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// the current Case Value, rather than emitting a RHS leaf node for it.
if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
cast<ConstantInt>(RHSR.first->Low)->getValue() ==
- (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
FalseBB = RHSR.first->BB;
} else {
FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
CurMF->insert(BBI, FalseBB);
- WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+ WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR));
// Put SV in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(SV);
@@ -2486,8 +2527,6 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
visitSwitchCase(CB, SwitchBB);
else
SwitchCases.push_back(CB);
-
- return true;
}
/// handleBitTestsSwitchCase - if current case range has few destination and
@@ -2514,15 +2553,14 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
return false;
size_t numCmps = 0;
- for (CaseItr I = CR.Range.first, E = CR.Range.second;
- I!=E; ++I) {
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
// Single case counts one, case range - two.
numCmps += (I->Low == I->High ? 1 : 2);
}
// Count unique destinations
SmallSet<MachineBasicBlock*, 4> Dests;
- for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
Dests.insert(I->BB);
if (Dests.size() > 3)
// Don't bother the code below, if there are too much unique destinations
@@ -2629,9 +2667,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const SwitchInst& SI) {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
- // Start with "simple" cases
- for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
- i != e; ++i) {
+ // Start with "simple" cases.
+ for (SwitchInst::ConstCaseIt i : SI.cases()) {
const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
@@ -2694,32 +2731,58 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Figure out which block is immediately after the current one.
MachineBasicBlock *NextBlock = nullptr;
+ if (SwitchMBB + 1 != FuncInfo.MF->end())
+ NextBlock = SwitchMBB + 1;
+
+
+ // Create a vector of Cases, sorted so that we can efficiently create a binary
+ // search tree from them.
+ CaseVector Cases;
+ Clusterify(Cases, SI);
+
+ // Get the default destination MBB.
MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
- // If there is only the default destination, branch to it if it is not the
- // next basic block. Otherwise, just fall through.
- if (!SI.getNumCases()) {
- // Update machine-CFG edges.
+ if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) &&
+ !Cases.empty()) {
+ // Replace an unreachable default destination with the most popular case
+ // destination.
+ DenseMap<const BasicBlock *, unsigned> Popularity;
+ unsigned MaxPop = 0;
+ const BasicBlock *MaxBB = nullptr;
+ for (auto I : SI.cases()) {
+ const BasicBlock *BB = I.getCaseSuccessor();
+ if (++Popularity[BB] > MaxPop) {
+ MaxPop = Popularity[BB];
+ MaxBB = BB;
+ }
+ }
- // If this is not a fall-through branch, emit the branch.
+ // Set new default.
+ assert(MaxPop > 0);
+ assert(MaxBB);
+ Default = FuncInfo.MBBMap[MaxBB];
+
+ // Remove cases that were pointing to the destination that is now the default.
+ Cases.erase(std::remove_if(Cases.begin(), Cases.end(),
+ [&](const Case &C) { return C.BB == Default; }),
+ Cases.end());
+ }
+
+ // If there is only the default destination, go there directly.
+ if (Cases.empty()) {
+ // Update machine-CFG edges.
SwitchMBB->addSuccessor(Default);
- if (Default != NextBlock)
- DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
- MVT::Other, getControlRoot(),
- DAG.getBasicBlock(Default)));
+ // If this is not a fall-through branch, emit the branch.
+ if (Default != NextBlock) {
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(Default)));
+ }
return;
}
- // If there are any non-default case statements, create a vector of Cases
- // representing each one, and sort the vector so that we can efficiently
- // create a binary search tree from them.
- CaseVector Cases;
- Clusterify(Cases, SI);
-
- // Get the Value to be switched on and default basic blocks, which will be
- // inserted into CaseBlock records, representing basic blocks in the binary
- // search tree.
+ // Get the Value to be switched on.
const Value *SV = SI.getCondition();
// Push the initial CaseRec onto the worklist
@@ -3613,6 +3676,74 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
DAG.setRoot(StoreNode);
}
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // llvm.masked.store.*(Src0, Ptr, alignemt, Mask)
+ Value *PtrOperand = I.getArgOperand(1);
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(I.getArgOperand(0));
+ SDValue Mask = getValue(I.getArgOperand(3));
+ EVT VT = Src0.getValueType();
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOStore, VT.getStoreSize(),
+ Alignment, AAInfo);
+ SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
+ MMO, false);
+ DAG.setRoot(StoreNode);
+ setValue(&I, StoreNode);
+}
+
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
+ Value *PtrOperand = I.getArgOperand(0);
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(I.getArgOperand(3));
+ SDValue Mask = getValue(I.getArgOperand(2));
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getType());
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SDValue InChain = DAG.getRoot();
+ if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(PtrOperand,
+ AA->getTypeStoreSize(I.getType()),
+ AAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ InChain = DAG.getEntryNode();
+ }
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOLoad, VT.getStoreSize(),
+ Alignment, AAInfo, Ranges);
+
+ SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
+ ISD::NON_EXTLOAD);
+ SDValue OutChain = Load.getValue(1);
+ DAG.setRoot(OutChain);
+ setValue(&I, Load);
+}
+
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrder = I.getSuccessOrdering();
@@ -4460,11 +4591,10 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
+ if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
// If optimizing for size, don't insert too many multiplies. This
// inserts up to 5 multiplies.
- CountPopulation_32(Val)+Log2_32(Val) < 7) {
+ countPopulation(Val) + Log2_32(Val) < 7) {
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
// powi(x,15) generates one more multiply than it should), but this has
@@ -4623,7 +4753,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
- SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg));
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
EVT VT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName));
return nullptr;
@@ -4632,7 +4763,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Value *Reg = I.getArgOperand(0);
Value *RegValue = I.getArgOperand(1);
SDValue Chain = getValue(RegValue).getOperand(0);
- SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg));
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
RegName, getValue(RegValue)));
return nullptr;
@@ -4642,6 +4774,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::longjmp:
return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
+ // FIXME: this definition of "user defined address space" is x86-specific
// Assert for address < 256 since we support only user defined address
// spaces.
assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4662,6 +4795,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset: {
+ // FIXME: this definition of "user defined address space" is x86-specific
// Assert for address < 256 since we support only user defined address
// spaces.
assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4679,6 +4813,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove: {
+ // FIXME: this definition of "user defined address space" is x86-specific
// Assert for address < 256 since we support only user defined address
// spaces.
assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4914,6 +5049,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
+ case Intrinsic::masked_load:
+ visitMaskedLoad(I);
+ return nullptr;
+ case Intrinsic::masked_store:
+ visitMaskedStore(I);
+ return nullptr;
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
@@ -5459,6 +5600,78 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
visitPatchpoint(&I);
return nullptr;
}
+ case Intrinsic::experimental_gc_statepoint: {
+ visitStatepoint(I);
+ return nullptr;
+ }
+ case Intrinsic::experimental_gc_result_int:
+ case Intrinsic::experimental_gc_result_float:
+ case Intrinsic::experimental_gc_result_ptr:
+ case Intrinsic::experimental_gc_result: {
+ visitGCResult(I);
+ return nullptr;
+ }
+ case Intrinsic::experimental_gc_relocate: {
+ visitGCRelocate(I);
+ return nullptr;
+ }
+ case Intrinsic::instrprof_increment:
+ llvm_unreachable("instrprof failed to lower an increment");
+
+ case Intrinsic::frameallocate: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ // Do the allocation and map it as a normal value.
+ // FIXME: Maybe we should add this to the alloca map so that we don't have
+ // to register allocate it?
+ uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
+ int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size);
+ MVT PtrVT = TLI.getPointerTy(0);
+ SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT);
+ setValue(&I, FIVal);
+
+ // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is
+ // the same on all targets.
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+ TII->get(TargetOpcode::FRAME_ALLOC))
+ .addSym(FrameAllocSym)
+ .addFrameIndex(Alloc);
+
+ return nullptr;
+ }
+
+ case Intrinsic::framerecover: {
+ // i8* @llvm.framerecover(i8* %fn, i8* %fp)
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT PtrVT = TLI.getPointerTy(0);
+
+ // Get the symbol that defines the frame offset.
+ Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName());
+
+ // Create a TargetExternalSymbol for the label to avoid any target lowering
+ // that would make this PC relative.
+ StringRef Name = FrameAllocSym->getName();
+ assert(Name.size() == strlen(Name.data()) && "not null terminated");
+ SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
+ SDValue OffsetVal =
+ DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
+
+ // Add the offset to the FP.
+ Value *FP = I.getArgOperand(1);
+ SDValue FPVal = getValue(FP);
+ SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
+ setValue(&I, Add);
+
+ return nullptr;
+ }
+ case Intrinsic::eh_begincatch:
+ case Intrinsic::eh_endcatch:
+ llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
}
}
@@ -5491,9 +5704,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
CLI.setChain(getRoot());
}
-
- const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
- std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
assert((CLI.IsTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
@@ -6191,9 +6403,10 @@ static void GetRegistersForValue(SelectionDAG &DAG,
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
- std::pair<unsigned, const TargetRegisterClass*> PhysReg =
- TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+ TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(),
+ OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other) {
@@ -6289,8 +6502,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfoVector ConstraintOperands;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI.ParseConstraints(CS);
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS);
bool hasMemory = false;
@@ -6382,12 +6595,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
- Input.ConstraintVT);
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+ Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
@@ -6848,7 +7062,8 @@ std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
bool UseVoidTy,
- MachineBasicBlock *LandingPad) {
+ MachineBasicBlock *LandingPad,
+ bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
@@ -6871,7 +7086,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
.setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
- .setDiscardResult(CS->use_empty());
+ .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
return lowerInvokable(CLI, LandingPad);
}
@@ -7003,7 +7218,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
std::pair<SDValue, SDValue> Result =
lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC,
- LandingPad);
+ LandingPad, true);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
@@ -7051,8 +7266,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// Push the arguments from the call instruction up to the register mask.
SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
- for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
- Ops.push_back(*i);
+ Ops.append(Call->op_begin() + 2, e);
// Push live variables for the stack map.
addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this);
@@ -7251,11 +7465,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
if (Args[i].isNest)
Flags.setNest();
- if (NeedsRegBlock) {
+ if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- if (Value == NumValues - 1)
- Flags.setInConsecutiveRegsLast();
- }
Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
@@ -7304,6 +7515,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
+
+ if (NeedsRegBlock && Value == NumValues - 1)
+ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
}
}
@@ -7460,7 +7674,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
- ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
+ ISD::InputArg::NoArgIndex, 0);
Ins.push_back(RetArg);
}
@@ -7518,11 +7733,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
- if (NeedsRegBlock) {
+ if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- if (Value == NumValues - 1)
- Flags.setInConsecutiveRegsLast();
- }
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -7537,6 +7749,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setOrigAlign(1);
Ins.push_back(MyFlags);
}
+ if (NeedsRegBlock && Value == NumValues - 1)
+ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
PartBase += VT.getStoreSize();
}
}
@@ -7671,7 +7885,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
assert(i == InVals.size() && "Argument register count mismatch!");
// Finally, if the target has anything special to do, allow it to do so.
- // FIXME: this should insert code into the DAG!
EmitFunctionEntryCode();
}
@@ -7762,6 +7975,7 @@ MachineBasicBlock *
SelectionDAGBuilder::StackProtectorDescriptor::
AddSuccessorMBB(const BasicBlock *BB,
MachineBasicBlock *ParentMBB,
+ bool IsLikely,
MachineBasicBlock *SuccMBB) {
// If SuccBB has not been created yet, create it.
if (!SuccMBB) {
@@ -7771,6 +7985,7 @@ AddSuccessorMBB(const BasicBlock *BB,
MF->insert(++BBI, SuccMBB);
}
// Add it as a successor of ParentMBB.
- ParentMBB->addSuccessor(SuccMBB);
+ ParentMBB->addSuccessor(
+ SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
return SuccMBB;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f74e652..ad7411f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -14,11 +14,13 @@
#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+#include "StatepointLowering.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLowering.h"
@@ -115,6 +117,10 @@ public:
/// get simple disambiguation between loads without worrying about alias
/// analysis.
SmallVector<SDValue, 8> PendingLoads;
+
+ /// State used while lowering a statepoint sequence (gc_statepoint,
+ /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details.
+ StatepointLoweringState StatepointLowering;
private:
/// PendingExports - CopyToReg nodes that copy values to virtual registers
@@ -417,8 +423,8 @@ private:
assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
"already initialized!");
ParentMBB = MBB;
- SuccessMBB = AddSuccessorMBB(BB, MBB);
- FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB);
+ SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
+ FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
if (!Guard)
Guard = StackProtCheckCall.getArgOperand(0);
}
@@ -487,9 +493,10 @@ private:
/// Add a successor machine basic block to ParentMBB. If the successor mbb
/// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
- /// block will be created.
+ /// block will be created. Assign a large weight if IsLikely is true.
MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
MachineBasicBlock *ParentMBB,
+ bool IsLikely,
MachineBasicBlock *SuccMBB = nullptr);
};
@@ -612,6 +619,13 @@ public:
N = NewN;
}
+ void removeValue(const Value *V) {
+ // This is to support hack in lowerCallFromStatepoint
+ // Should be removed when hack is resolved
+ if (NodeMap.count(V))
+ NodeMap.erase(V);
+ }
+
void setUnusedArgValue(const Value *V, SDValue NewN) {
SDValue &N = UnusedArgNodeMap[V];
assert(!N.getNode() && "Already set a value for this node!");
@@ -640,12 +654,15 @@ public:
unsigned NumArgs,
SDValue Callee,
bool UseVoidTy = false,
- MachineBasicBlock *LandingPad = nullptr);
+ MachineBasicBlock *LandingPad = nullptr,
+ bool IsPatchPoint = false);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
/// references that need to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+ // This function is responsible for the whole statepoint lowering process.
+ void LowerStatepoint(ImmutableStatepoint Statepoint);
private:
std::pair<SDValue, SDValue> lowerInvokable(
TargetLowering::CallLoweringInfo &CLI,
@@ -673,6 +690,8 @@ private:
CaseRecVector& WorkList,
const Value* SV,
MachineBasicBlock *SwitchBB);
+ void splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRecVector &WorkList,
+ const Value *SV, MachineBasicBlock *SwitchBB);
bool handleBitTestsSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
@@ -699,6 +718,8 @@ public:
void visitJumpTable(JumpTable &JT);
void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB);
+ unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV,
+ MachineBasicBlock *LPadMBB);
private:
// These all get lowered before this pass.
@@ -756,6 +777,8 @@ private:
void visitAlloca(const AllocaInst &I);
void visitLoad(const LoadInst &I);
void visitStore(const StoreInst &I);
+ void visitMaskedLoad(const CallInst &I);
+ void visitMaskedStore(const CallInst &I);
void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
void visitAtomicRMW(const AtomicRMWInst &I);
void visitFence(const FenceInst &I);
@@ -784,6 +807,11 @@ private:
void visitPatchpoint(ImmutableCallSite CS,
MachineBasicBlock *LandingPad = nullptr);
+ // These three are implemented in StatepointLowering.cpp
+ void visitStatepoint(const CallInst &I);
+ void visitGCRelocate(const CallInst &I);
+ void visitGCResult(const CallInst &I);
+
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index c9f6cff..17eff94 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -187,6 +187,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMUL: return "fmul";
case ISD::FDIV: return "fdiv";
case ISD::FMA: return "fma";
+ case ISD::FMAD: return "fmad";
case ISD::FREM: return "frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
@@ -269,6 +270,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Other operators
case ISD::LOAD: return "load";
case ISD::STORE: return "store";
+ case ISD::MLOAD: return "masked_load";
+ case ISD::MSTORE: return "masked_store";
case ISD::VAARG: return "vaarg";
case ISD::VACOPY: return "vacopy";
case ISD::VAEND: return "vaend";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 79109b7..5e867cf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -19,10 +19,11 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -31,6 +32,7 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
@@ -40,6 +42,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -47,7 +50,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -181,6 +183,10 @@ UseMBPI("use-mbpi",
cl::init(true), cl::Hidden);
#ifndef NDEBUG
+static cl::opt<std::string>
+FilterDAGBasicBlockName("filter-view-dags", cl::Hidden,
+ cl::desc("Only display the basic block whose name "
+ "matches this for all view-*-dags options"));
static cl::opt<bool>
ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
cl::desc("Pop up a window to show dags before the first "
@@ -345,7 +351,8 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
- initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
}
SelectionDAGISel::~SelectionDAGISel() {
@@ -359,7 +366,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -372,7 +379,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
///
/// This is required for correctness, so it must be done at -O0.
///
-static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
// Loop for blocks with phi nodes.
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
PHINode *PN = dyn_cast<PHINode>(BB->begin());
@@ -396,8 +403,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
continue;
// Okay, we have to split this edge.
- SplitCriticalEdge(Pred->getTerminator(),
- GetSuccessorNumber(Pred, BB), SDISel, true);
+ SplitCriticalEdge(
+ Pred->getTerminator(), GetSuccessorNumber(Pred, BB),
+ CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges());
goto ReprocessBlock;
}
}
@@ -429,12 +437,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
- LibInfo = &getAnalysis<TargetLibraryInfo>();
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA);
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF, CurDAG);
@@ -650,6 +658,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
std::string BlockName;
int BlockNumber = -1;
(void)BlockNumber;
+ bool MatchFilterBB = false; (void)MatchFilterBB;
+#ifndef NDEBUG
+ MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
+ FilterDAGBasicBlockName ==
+ FuncInfo->MBB->getBasicBlock()->getName().str());
+#endif
#ifdef NDEBUG
if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
@@ -663,7 +677,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
- if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+ if (ViewDAGCombine1 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine1 input for " + BlockName);
// Run the DAG combiner in pre-legalize mode.
{
@@ -676,8 +691,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
- if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
- BlockName);
+ if (ViewLegalizeTypesDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize-types input for " + BlockName);
bool Changed;
{
@@ -691,7 +706,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->NewNodesMustHaveLegalTypes = true;
if (Changed) {
- if (ViewDAGCombineLT)
+ if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
// Run the DAG combiner in post-type-legalize mode.
@@ -717,7 +732,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->LegalizeTypes();
}
- if (ViewDAGCombineLT)
+ if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
// Run the DAG combiner in post-type-legalize mode.
@@ -731,7 +746,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
}
- if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+ if (ViewLegalizeDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize input for " + BlockName);
{
NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
@@ -741,7 +757,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
- if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+ if (ViewDAGCombine2 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine2 input for " + BlockName);
// Run the DAG combiner in post-legalize mode.
{
@@ -755,7 +772,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
- if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+ if (ViewISelDAGs && MatchFilterBB)
+ CurDAG->viewGraph("isel input for " + BlockName);
// Third, instruction select all of the operations to machine code, adding the
// code to the MachineBasicBlock.
@@ -767,7 +785,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
- if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+ if (ViewSchedDAGs && MatchFilterBB)
+ CurDAG->viewGraph("scheduler input for " + BlockName);
// Schedule machine code.
ScheduleDAGSDNodes *Scheduler = CreateScheduler();
@@ -777,7 +796,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Scheduler->Run(CurDAG, FuncInfo->MBB);
}
- if (ViewSUnitDAGs) Scheduler->viewGraph();
+ if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph();
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
@@ -892,6 +911,8 @@ void SelectionDAGISel::DoInstructionSelection() {
void SelectionDAGISel::PrepareEHLandingPad() {
MachineBasicBlock *MBB = FuncInfo->MBB;
+ const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
+
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -903,8 +924,73 @@ void SelectionDAGISel::PrepareEHLandingPad() {
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
+ // If this is an MSVC-style personality function, we need to split the landing
+ // pad into several BBs.
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
+ const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
+ MF->getMMI().addPersonality(
+ MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts()));
+ if (MF->getMMI().getPersonalityType() == EHPersonality::MSVC_Win64SEH) {
+ // Make virtual registers and a series of labels that fill in values for the
+ // clauses.
+ auto &RI = MF->getRegInfo();
+ FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC);
+
+ // Get all invoke BBs that will unwind into the clause BBs.
+ SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
+ MBB->pred_end());
+
+ // Emit separate machine basic blocks with separate labels for each clause
+ // before the main landing pad block.
+ MachineInstrBuilder SelectorPHI = BuildMI(
+ *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI),
+ FuncInfo->ExceptionSelectorVirtReg);
+ for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) {
+ // Skip filter clauses, we can't implement them yet.
+ if (LPadInst->isFilter(I))
+ continue;
+
+ MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB);
+ MF->insert(MBB, ClauseBB);
+
+ // Add the edge from the invoke to the clause.
+ for (MachineBasicBlock *InvokeBB : InvokeBBs)
+ InvokeBB->addSuccessor(ClauseBB);
+
+ // Mark the clause as a landing pad or MI passes will delete it.
+ ClauseBB->setIsLandingPad();
+
+ GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I));
+
+ // Start the BB with a label.
+ MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB);
+ BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II)
+ .addSym(ClauseLabel);
+
+ // Construct a simple BB that defines a register with the typeid constant.
+ FuncInfo->MBB = ClauseBB;
+ FuncInfo->InsertPt = ClauseBB->end();
+ unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Add the typeid virtual register to the phi in the main landing pad.
+ SelectorPHI.addReg(VReg).addMBB(ClauseBB);
+ }
+
+ // Remove the edge from the invoke to the lpad.
+ for (MachineBasicBlock *InvokeBB : InvokeBBs)
+ InvokeBB->removeSuccessor(MBB);
+
+ // Restore FuncInfo back to its previous state and select the main landing
+ // pad block.
+ FuncInfo->MBB = MBB;
+ FuncInfo->InsertPt = MBB->end();
+ return;
+ }
+
// Mark exception register as live in.
- const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
if (unsigned Reg = TLI->getExceptionPointerRegister())
FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
new file mode 100644
index 0000000..1271f6b
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -0,0 +1,679 @@
+//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "StatepointLowering.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "statepoint-lowering"
+
+STATISTIC(NumSlotsAllocatedForStatepoints,
+ "Number of stack slots allocated for statepoints");
+STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
+STATISTIC(StatepointMaxSlotsRequired,
+ "Maximum number of stack slots required for a singe statepoint");
+
+void
+StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
+ // Consistency check
+ assert(PendingGCRelocateCalls.empty() &&
+ "Trying to visit statepoint before finished processing previous one");
+ Locations.clear();
+ RelocLocations.clear();
+ NextSlotToAllocate = 0;
+ // Need to resize this on each safepoint - we need the two to stay in
+ // sync and the clear patterns of a SelectionDAGBuilder have no relation
+ // to FunctionLoweringInfo.
+ AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
+ for (size_t i = 0; i < AllocatedStackSlots.size(); i++) {
+ AllocatedStackSlots[i] = false;
+ }
+}
+void StatepointLoweringState::clear() {
+ Locations.clear();
+ RelocLocations.clear();
+ AllocatedStackSlots.clear();
+ assert(PendingGCRelocateCalls.empty() &&
+ "cleared before statepoint sequence completed");
+}
+
+SDValue
+StatepointLoweringState::allocateStackSlot(EVT ValueType,
+ SelectionDAGBuilder &Builder) {
+
+ NumSlotsAllocatedForStatepoints++;
+
+ // The basic scheme here is to first look for a previously created stack slot
+ // which is not in use (accounting for the fact arbitrary slots may already
+ // be reserved), or to create a new stack slot and use it.
+
+ // If this doesn't succeed in 40000 iterations, something is seriously wrong
+ for (int i = 0; i < 40000; i++) {
+ assert(Builder.FuncInfo.StatepointStackSlots.size() ==
+ AllocatedStackSlots.size() &&
+ "broken invariant");
+ const size_t NumSlots = AllocatedStackSlots.size();
+ assert(NextSlotToAllocate <= NumSlots && "broken invariant");
+
+ if (NextSlotToAllocate >= NumSlots) {
+ assert(NextSlotToAllocate == NumSlots);
+ // record stats
+ if (NumSlots + 1 > StatepointMaxSlotsRequired) {
+ StatepointMaxSlotsRequired = NumSlots + 1;
+ }
+
+ SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
+ const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+ AllocatedStackSlots.push_back(true);
+ return SpillSlot;
+ }
+ if (!AllocatedStackSlots[NextSlotToAllocate]) {
+ const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
+ AllocatedStackSlots[NextSlotToAllocate] = true;
+ return Builder.DAG.getFrameIndex(FI, ValueType);
+ }
+ // Note: We deliberately choose to advance this only on the failing path.
+ // Doing so on the suceeding path involes a bit of complexity that caused a
+ // minor bug previously. Unless performance shows this matters, please
+ // keep this code as simple as possible.
+ NextSlotToAllocate++;
+ }
+ llvm_unreachable("infinite loop?");
+}
+
+/// Try to find existing copies of the incoming values in stack slots used for
+/// statepoint spilling. If we can find a spill slot for the incoming value,
+/// mark that slot as allocated, and reuse the same slot for this safepoint.
+/// This helps to avoid series of loads and stores that only serve to resuffle
+/// values on the stack between calls.
+static void reservePreviousStackSlotForValue(SDValue Incoming,
+ SelectionDAGBuilder &Builder) {
+
+ if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
+ // We won't need to spill this, so no need to check for previously
+ // allocated stack slots
+ return;
+ }
+
+ SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+ if (Loc.getNode()) {
+ // duplicates in input
+ return;
+ }
+
+ // Search back for the load from a stack slot pattern to find the original
+ // slot we allocated for this value. We could extend this to deal with
+ // simple modification patterns, but simple dealing with trivial load/store
+ // sequences helps a lot already.
+ if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Incoming)) {
+ if (auto *FI = dyn_cast<FrameIndexSDNode>(Load->getBasePtr())) {
+ const int Index = FI->getIndex();
+ auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(),
+ Builder.FuncInfo.StatepointStackSlots.end(), Index);
+ if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) {
+ // not one of the lowering stack slots, can't reuse!
+ // TODO: Actually, we probably could reuse the stack slot if the value
+ // hasn't changed at all, but we'd need to look for intervening writes
+ return;
+ } else {
+ // This is one of our dedicated lowering slots
+ const int Offset =
+ std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr);
+ if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
+ // stack slot already assigned to someone else, can't use it!
+ // TODO: currently we reserve space for gc arguments after doing
+ // normal allocation for deopt arguments. We should reserve for
+ // _all_ deopt and gc arguments, then start allocating. This
+ // will prevent some moves being inserted when vm state changes,
+ // but gc state doesn't between two calls.
+ return;
+ }
+ // Reserve this stack slot
+ Builder.StatepointLowering.reserveStackSlot(Offset);
+ }
+
+ // Cache this slot so we find it when going through the normal
+ // assignment loop.
+ SDValue Loc =
+ Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
+
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+ }
+ }
+
+ // TODO: handle case where a reloaded value flows through a phi to
+ // another safepoint. e.g.
+ // bb1:
+ // a' = relocated...
+ // bb2: % pred: bb1, bb3, bb4, etc.
+ // a_phi = phi(a', ...)
+ // statepoint ... a_phi
+ // NOTE: This will require reasoning about cross basic block values. This is
+ // decidedly non trivial and this might not be the right place to do it. We
+ // don't really have the information we need here...
+
+ // TODO: handle simple updates. If a value is modified and the original
+ // value is no longer live, it would be nice to put the modified value in the
+ // same slot. This allows folding of the memory accesses for some
+ // instructions types (like an increment).
+ // statepoint (i)
+ // i1 = i+1
+ // statepoint (i1)
+}
+
+/// Remove any duplicate (as SDValues) from the derived pointer pairs. This
+/// is not required for correctness. It's purpose is to reduce the size of
+/// StackMap section. It has no effect on the number of spill slots required
+/// or the actual lowering.
+static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
+ SmallVectorImpl<const Value *> &Ptrs,
+ SmallVectorImpl<const Value *> &Relocs,
+ SelectionDAGBuilder &Builder) {
+
+ // This is horribly ineffecient, but I don't care right now
+ SmallSet<SDValue, 64> Seen;
+
+ SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs;
+ for (size_t i = 0; i < Ptrs.size(); i++) {
+ SDValue SD = Builder.getValue(Ptrs[i]);
+ // Only add non-duplicates
+ if (Seen.count(SD) == 0) {
+ NewBases.push_back(Bases[i]);
+ NewPtrs.push_back(Ptrs[i]);
+ NewRelocs.push_back(Relocs[i]);
+ }
+ Seen.insert(SD);
+ }
+ assert(Bases.size() >= NewBases.size());
+ assert(Ptrs.size() >= NewPtrs.size());
+ assert(Relocs.size() >= NewRelocs.size());
+ Bases = NewBases;
+ Ptrs = NewPtrs;
+ Relocs = NewRelocs;
+ assert(Ptrs.size() == Bases.size());
+ assert(Ptrs.size() == Relocs.size());
+}
+
+/// Extract call from statepoint, lower it and return pointer to the
+/// call node. Also update NodeMap so that getValue(statepoint) will
+/// reference lowered call result
+static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
+ SelectionDAGBuilder &Builder) {
+
+ ImmutableCallSite CS(StatepointSite.getCallSite());
+
+ // Lower the actual call itself - This is a bit of a hack, but we want to
+ // avoid modifying the actual lowering code. This is similiar in intent to
+ // the LowerCallOperands mechanism used by PATCHPOINT, but is structured
+ // differently. Hopefully, this is slightly more robust w.r.t. calling
+ // convention, return values, and other function attributes.
+ Value *ActualCallee = const_cast<Value *>(StatepointSite.actualCallee());
+
+ std::vector<Value *> Args;
+ CallInst::const_op_iterator arg_begin = StatepointSite.call_args_begin();
+ CallInst::const_op_iterator arg_end = StatepointSite.call_args_end();
+ Args.insert(Args.end(), arg_begin, arg_end);
+ // TODO: remove the creation of a new instruction! We should not be
+ // modifying the IR (even temporarily) at this point.
+ CallInst *Tmp = CallInst::Create(ActualCallee, Args);
+ Tmp->setTailCall(CS.isTailCall());
+ Tmp->setCallingConv(CS.getCallingConv());
+ Tmp->setAttributes(CS.getAttributes());
+ Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false);
+
+ // Handle the return value of the call iff any.
+ const bool HasDef = !Tmp->getType()->isVoidTy();
+ if (HasDef) {
+ // The value of the statepoint itself will be the value of call itself.
+ // We'll replace the actually call node shortly. gc_result will grab
+ // this value.
+ Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp));
+ } else {
+ // The token value is never used from here on, just generate a poison value
+ Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1));
+ }
+ // Remove the fake entry we created so we don't have a hanging reference
+ // after we delete this node.
+ Builder.removeValue(Tmp);
+ delete Tmp;
+ Tmp = nullptr;
+
+ // Search for the call node
+ // The following code is essentially reverse engineering X86's
+ // LowerCallTo.
+ SDNode *CallNode = nullptr;
+
+ // We just emitted a call, so it should be last thing generated
+ SDValue Chain = Builder.DAG.getRoot();
+
+ // Find closest CALLSEQ_END walking back through lowered nodes if needed
+ SDNode *CallEnd = Chain.getNode();
+ int Sanity = 0;
+ while (CallEnd->getOpcode() != ISD::CALLSEQ_END) {
+ CallEnd = CallEnd->getGluedNode();
+ assert(CallEnd && "Can not find call node");
+ assert(Sanity < 20 && "should have found call end already");
+ Sanity++;
+ }
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ assert(CallEnd->getGluedNode());
+
+ // Step back inside the CALLSEQ
+ CallNode = CallEnd->getGluedNode();
+ return CallNode;
+}
+
+/// Callect all gc pointers coming into statepoint intrinsic, clean them up,
+/// and return two arrays:
+/// Bases - base pointers incoming to this statepoint
+/// Ptrs - derived pointers incoming to this statepoint
+/// Relocs - the gc_relocate corresponding to each base/ptr pair
+/// Elements of this arrays should be in one-to-one correspondence with each
+/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call
+static void
+getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases,
+ SmallVectorImpl<const Value *> &Ptrs,
+ SmallVectorImpl<const Value *> &Relocs,
+ ImmutableStatepoint StatepointSite,
+ SelectionDAGBuilder &Builder) {
+ for (GCRelocateOperands relocateOpers :
+ StatepointSite.getRelocates(StatepointSite)) {
+ Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
+ Bases.push_back(relocateOpers.basePtr());
+ Ptrs.push_back(relocateOpers.derivedPtr());
+ }
+
+ // Remove any redundant llvm::Values which map to the same SDValue as another
+ // input. Also has the effect of removing duplicates in the original
+ // llvm::Value input list as well. This is a useful optimization for
+ // reducing the size of the StackMap section. It has no other impact.
+ removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder);
+
+ assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size());
+}
+
+/// Spill a value incoming to the statepoint. It might be either part of
+/// vmstate
+/// or gcstate. In both cases unconditionally spill it on the stack unless it
+/// is a null constant. Return pair with first element being frame index
+/// containing saved value and second element with outgoing chain from the
+/// emitted store
+static std::pair<SDValue, SDValue>
+spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
+ SelectionDAGBuilder &Builder) {
+ SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+
+ // Emit new store if we didn't do it for this ptr before
+ if (!Loc.getNode()) {
+ Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(),
+ Builder);
+ assert(isa<FrameIndexSDNode>(Loc));
+ int Index = cast<FrameIndexSDNode>(Loc)->getIndex();
+ // We use TargetFrameIndex so that isel will not select it into LEA
+ Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
+
+ // TODO: We can create TokenFactor node instead of
+ // chaining stores one after another, this may allow
+ // a bit more optimal scheduling for them
+ Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
+ MachinePointerInfo::getFixedStack(Index),
+ false, false, 0);
+
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+ }
+
+ assert(Loc.getNode());
+ return std::make_pair(Loc, Chain);
+}
+
+/// Lower a single value incoming to a statepoint node. This value can be
+/// either a deopt value or a gc value, the handling is the same. We special
+/// case constants and allocas, then fall back to spilling if required.
+static void lowerIncomingStatepointValue(SDValue Incoming,
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder &Builder) {
+ SDValue Chain = Builder.getRoot();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+ // If the original value was a constant, make sure it gets recorded as
+ // such in the stackmap. This is required so that the consumer can
+ // parse any internal format to the deopt state. It also handles null
+ // pointers and other constant pointers in GC states
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint
+ const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
+ Ops.push_back(
+ Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+ } else {
+ // Otherwise, locate a spill slot and explicitly spill it so it
+ // can be found by the runtime later. We currently do not support
+ // tracking values through callee saved registers to their eventual
+ // spill location. This would be a useful optimization, but would
+ // need to be optional since it requires a lot of complexity on the
+ // runtime side which not all would support.
+ std::pair<SDValue, SDValue> Res =
+ spillIncomingStatepointValue(Incoming, Chain, Builder);
+ Ops.push_back(Res.first);
+ Chain = Res.second;
+ }
+
+ Builder.DAG.setRoot(Chain);
+}
+
+/// Lower deopt state and gc pointer arguments of the statepoint. The actual
+/// lowering is described in lowerIncomingStatepointValue. This function is
+/// responsible for lowering everything in the right position and playing some
+/// tricks to avoid redundant stack manipulation where possible. On
+/// completion, 'Ops' will contain ready to use operands for machine code
+/// statepoint. The chain nodes will have already been created and the DAG root
+/// will be set to the last value spilled (if any were).
+static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
+ ImmutableStatepoint StatepointSite,
+ SelectionDAGBuilder &Builder) {
+
+ // Lower the deopt and gc arguments for this statepoint. Layout will
+ // be: deopt argument length, deopt arguments.., gc arguments...
+
+ SmallVector<const Value *, 64> Bases, Ptrs, Relocations;
+ getIncomingStatepointGCValues(Bases, Ptrs, Relocations,
+ StatepointSite, Builder);
+
+#ifndef NDEBUG
+ // Check that each of the gc pointer and bases we've gotten out of the
+ // safepoint is something the strategy thinks might be a pointer into the GC
+ // heap. This is basically just here to help catch errors during statepoint
+ // insertion. TODO: This should actually be in the Verifier, but we can't get
+ // to the GCStrategy from there (yet).
+ if (Builder.GFI) {
+ GCStrategy &S = Builder.GFI->getStrategy();
+ for (const Value *V : Bases) {
+ auto Opt = S.isGCManagedPointer(V);
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed base pointer found in statepoint");
+ }
+ }
+ for (const Value *V : Ptrs) {
+ auto Opt = S.isGCManagedPointer(V);
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed derived pointer found in statepoint");
+ }
+ }
+ for (const Value *V : Relocations) {
+ auto Opt = S.isGCManagedPointer(V);
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() && "non gc managed pointer relocated");
+ }
+ }
+ }
+#endif
+
+
+
+ // Before we actually start lowering (and allocating spill slots for values),
+ // reserve any stack slots which we judge to be profitable to reuse for a
+ // particular value. This is purely an optimization over the code below and
+ // doesn't change semantics at all. It is important for performance that we
+ // reserve slots for both deopt and gc values before lowering either.
+ for (auto I = StatepointSite.vm_state_begin() + 1,
+ E = StatepointSite.vm_state_end();
+ I != E; ++I) {
+ Value *V = *I;
+ SDValue Incoming = Builder.getValue(V);
+ reservePreviousStackSlotForValue(Incoming, Builder);
+ }
+ for (unsigned i = 0; i < Bases.size() * 2; ++i) {
+ // Even elements will contain base, odd elements - derived ptr
+ const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2];
+ SDValue Incoming = Builder.getValue(V);
+ reservePreviousStackSlotForValue(Incoming, Builder);
+ }
+
+ // First, prefix the list with the number of unique values to be
+ // lowered. Note that this is the number of *Values* not the
+ // number of SDValues required to lower them.
+ const int NumVMSArgs = StatepointSite.numTotalVMSArgs();
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64));
+
+ assert(NumVMSArgs + 1 == std::distance(StatepointSite.vm_state_begin(),
+ StatepointSite.vm_state_end()));
+
+ // The vm state arguments are lowered in an opaque manner. We do
+ // not know what type of values are contained within. We skip the
+ // first one since that happens to be the total number we lowered
+ // explicitly just above. We could have left it in the loop and
+ // not done it explicitly, but it's far easier to understand this
+ // way.
+ for (auto I = StatepointSite.vm_state_begin() + 1,
+ E = StatepointSite.vm_state_end();
+ I != E; ++I) {
+ const Value *V = *I;
+ SDValue Incoming = Builder.getValue(V);
+ lowerIncomingStatepointValue(Incoming, Ops, Builder);
+ }
+
+ // Finally, go ahead and lower all the gc arguments. There's no prefixed
+ // length for this one. After lowering, we'll have the base and pointer
+ // arrays interwoven with each (lowered) base pointer immediately followed by
+ // it's (lowered) derived pointer. i.e
+ // (base[0], ptr[0], base[1], ptr[1], ...)
+ for (unsigned i = 0; i < Bases.size() * 2; ++i) {
+ // Even elements will contain base, odd elements - derived ptr
+ const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2];
+ SDValue Incoming = Builder.getValue(V);
+ lowerIncomingStatepointValue(Incoming, Ops, Builder);
+ }
+}
+
+void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
+ // Check some preconditions for sanity
+ assert(isStatepoint(&CI) &&
+ "function called must be the statepoint function");
+
+ LowerStatepoint(ImmutableStatepoint(&CI));
+}
+
+void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) {
+ // The basic scheme here is that information about both the original call and
+ // the safepoint is encoded in the CallInst. We create a temporary call and
+ // lower it, then reverse engineer the calling sequence.
+
+ NumOfStatepoints++;
+ // Clear state
+ StatepointLowering.startNewStatepoint(*this);
+
+ ImmutableCallSite CS(ISP.getCallSite());
+
+#ifndef NDEBUG
+ // Consistency check
+ for (const User *U : CS->users()) {
+ const CallInst *Call = cast<CallInst>(U);
+ if (isGCRelocate(Call))
+ StatepointLowering.scheduleRelocCall(*Call);
+ }
+#endif
+
+#ifndef NDEBUG
+ // If this is a malformed statepoint, report it early to simplify debugging.
+ // This should catch any IR level mistake that's made when constructing or
+ // transforming statepoints.
+ ISP.verify();
+
+ // Check that the associated GCStrategy expects to encounter statepoints.
+ // TODO: This if should become an assert. For now, we allow the GCStrategy
+ // to be optional for backwards compatibility. This will only last a short
+ // period (i.e. a couple of weeks).
+ if (GFI) {
+ assert(GFI->getStrategy().useStatepoints() &&
+ "GCStrategy does not expect to encounter statepoints");
+ }
+#endif
+
+
+ // Lower statepoint vmstate and gcstate arguments
+ SmallVector<SDValue, 10> LoweredArgs;
+ lowerStatepointMetaArgs(LoweredArgs, ISP, *this);
+
+ // Get call node, we will replace it later with statepoint
+ SDNode *CallNode = lowerCallFromStatepoint(ISP, *this);
+
+ // Construct the actual STATEPOINT node with all the appropriate arguments
+ // and return values.
+
+ // TODO: Currently, all of these operands are being marked as read/write in
+ // PrologEpilougeInserter.cpp, we should special case the VMState arguments
+ // and flags to be read-only.
+ SmallVector<SDValue, 40> Ops;
+
+ // Calculate and push starting position of vmstate arguments
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ SDValue Glue;
+ if (CallNode->getGluedNode()) {
+ // Glue is always last operand
+ Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
+ }
+ // Get number of arguments incoming directly into call node
+ unsigned NumCallRegArgs =
+ CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3);
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+
+ // Add call target
+ SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
+ Ops.push_back(CallTarget);
+
+ // Add call arguments
+ // Get position of register mask in the call
+ SDNode::op_iterator RegMaskIt;
+ if (Glue.getNode())
+ RegMaskIt = CallNode->op_end() - 2;
+ else
+ RegMaskIt = CallNode->op_end() - 1;
+ Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);
+
+ // Add a leading constant argument with the Flags and the calling convention
+ // masked together
+ CallingConv::ID CallConv = CS.getCallingConv();
+ int Flags = dyn_cast<ConstantInt>(CS.getArgument(2))->getZExtValue();
+ assert(Flags == 0 && "not expected to be used");
+ Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(
+ DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64));
+
+ // Insert all vmstate and gcstate arguments
+ Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end());
+
+ // Add register mask from call node
+ Ops.push_back(*RegMaskIt);
+
+ // Add chain
+ Ops.push_back(CallNode->getOperand(0));
+
+ // Same for the glue, but we add it only if original call had it
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Compute return values. Provide a glue output since we consume one as
+ // input. This allows someone else to chain off us as needed.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT,
+ getCurSDLoc(), NodeTys, Ops);
+
+ // Replace original call
+ DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root
+ // Remove originall call node
+ DAG.DeleteNode(CallNode);
+
+ // DON'T set the root - under the assumption that it's already set past the
+ // inserted node we created.
+
+ // TODO: A better future implementation would be to emit a single variable
+ // argument, variable return value STATEPOINT node here and then hookup the
+ // return value of each gc.relocate to the respective output of the
+ // previously emitted STATEPOINT value. Unfortunately, this doesn't appear
+ // to actually be possible today.
+}
+
+void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
+ // The result value of the gc_result is simply the result of the actual
+ // call. We've already emitted this, so just grab the value.
+ Instruction *I = cast<Instruction>(CI.getArgOperand(0));
+ assert(isStatepoint(I) &&
+ "first argument must be a statepoint token");
+
+ setValue(&CI, getValue(I));
+}
+
+void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
+#ifndef NDEBUG
+ // Consistency check
+ StatepointLowering.relocCallVisited(CI);
+#endif
+
+ GCRelocateOperands relocateOpers(&CI);
+ SDValue SD = getValue(relocateOpers.derivedPtr());
+
+ if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) {
+ // We didn't need to spill these special cases (constants and allocas).
+ // See the handling in spillIncomingValueForStatepoint for detail.
+ setValue(&CI, SD);
+ return;
+ }
+
+ SDValue Loc = StatepointLowering.getRelocLocation(SD);
+ // Emit new load if we did not emit it before
+ if (!Loc.getNode()) {
+ SDValue SpillSlot = StatepointLowering.getLocation(SD);
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+
+ // Be conservative: flush all pending loads
+ // TODO: Probably we can be less restrictive on this,
+ // it may allow more scheduling opprtunities
+ SDValue Chain = getRoot();
+
+ Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain,
+ SpillSlot, MachinePointerInfo::getFixedStack(FI), false,
+ false, false, 0);
+
+ StatepointLowering.setRelocLocation(SD, Loc);
+
+ // Again, be conservative, don't emit pending loads
+ DAG.setRoot(Loc.getValue(1));
+ }
+
+ assert(Loc.getNode());
+ setValue(&CI, Loc);
+}
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h
new file mode 100644
index 0000000..673112c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -0,0 +1,138 @@
+//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include <vector>
+
+namespace llvm {
+class SelectionDAGBuilder;
+
+/// This class tracks both per-statepoint and per-selectiondag information.
+/// For each statepoint it tracks locations of it's gc valuess (incoming and
+/// relocated) and list of gcreloc calls scheduled for visiting (this is
+/// used for a debug mode consistency check only). The spill slot tracking
+/// works in concert with information in FunctionLoweringInfo.
+class StatepointLoweringState {
+public:
+ StatepointLoweringState() : NextSlotToAllocate(0) {
+ }
+
+ /// Reset all state tracking for a newly encountered safepoint. Also
+ /// performs some consistency checking.
+ void startNewStatepoint(SelectionDAGBuilder &Builder);
+
+ /// Clear the memory usage of this object. This is called from
+ /// SelectionDAGBuilder::clear. We require this is never called in the
+ /// midst of processing a statepoint sequence.
+ void clear();
+
+ /// Returns the spill location of a value incoming to the current
+ /// statepoint. Will return SDValue() if this value hasn't been
+ /// spilled. Otherwise, the value has already been spilled and no
+ /// further action is required by the caller.
+ SDValue getLocation(SDValue val) {
+ if (!Locations.count(val))
+ return SDValue();
+ return Locations[val];
+ }
+ void setLocation(SDValue val, SDValue Location) {
+ assert(!Locations.count(val) &&
+ "Trying to allocate already allocated location");
+ Locations[val] = Location;
+ }
+
+ /// Returns the relocated value for a given input pointer. Will
+ /// return SDValue() if this value hasn't yet been reloaded from
+ /// it's stack slot after the statepoint. Otherwise, the value
+ /// has already been reloaded and the SDValue of that reload will
+ /// be returned. Note that VMState values are spilled but not
+ /// reloaded (since they don't change at the safepoint unless
+ /// also listed in the GC pointer section) and will thus never
+ /// be in this map
+ SDValue getRelocLocation(SDValue val) {
+ if (!RelocLocations.count(val))
+ return SDValue();
+ return RelocLocations[val];
+ }
+ void setRelocLocation(SDValue val, SDValue Location) {
+ assert(!RelocLocations.count(val) &&
+ "Trying to allocate already allocated location");
+ RelocLocations[val] = Location;
+ }
+
+ /// Record the fact that we expect to encounter a given gc_relocate
+ /// before the next statepoint. If we don't see it, we'll report
+ /// an assertion.
+ void scheduleRelocCall(const CallInst &RelocCall) {
+ PendingGCRelocateCalls.push_back(&RelocCall);
+ }
+ /// Remove this gc_relocate from the list we're expecting to see
+ /// before the next statepoint. If we weren't expecting to see
+ /// it, we'll report an assertion.
+ void relocCallVisited(const CallInst &RelocCall) {
+ SmallVectorImpl<const CallInst *>::iterator itr =
+ std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(),
+ &RelocCall);
+ assert(itr != PendingGCRelocateCalls.end() &&
+ "Visited unexpected gcrelocate call");
+ PendingGCRelocateCalls.erase(itr);
+ }
+
+ // TODO: Should add consistency tracking to ensure we encounter
+ // expected gc_result calls too.
+
+ /// Get a stack slot we can use to store an value of type ValueType. This
+ /// will hopefully be a recylced slot from another statepoint.
+ SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder);
+
+ void reserveStackSlot(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ assert(!AllocatedStackSlots[Offset] && "already reserved!");
+ assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!");
+ AllocatedStackSlots[Offset] = true;
+ }
+ bool isStackSlotAllocated(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ return AllocatedStackSlots[Offset];
+ }
+
+private:
+ /// Maps pre-relocation value (gc pointer directly incoming into statepoint)
+ /// into it's location (currently only stack slots)
+ DenseMap<SDValue, SDValue> Locations;
+ /// Map pre-relocated value into it's new relocated location
+ DenseMap<SDValue, SDValue> RelocLocations;
+
+ /// A boolean indicator for each slot listed in the FunctionInfo as to
+ /// whether it has been used in the current statepoint. Since we try to
+ /// preserve stack slots across safepoints, there can be gaps in which
+ /// slots have been allocated.
+ SmallVector<bool, 50> AllocatedStackSlots;
+
+ /// Points just beyond the last slot known to have been allocated
+ unsigned NextSlotToAllocate;
+
+ /// Keep track of pending gcrelocate calls for consistency check
+ SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9aef5ed..0a3c926 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -793,19 +793,26 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
// If we only care about the highest bit, don't bother shifting right.
- if (MsbMask == DemandedMask) {
+ if (MsbMask == NewMask) {
unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
SDValue InOp = Op.getOperand(0);
-
- // Compute the correct shift amount type, which must be getShiftAmountTy
- // for scalar types after legalization.
- EVT ShiftAmtTy = Op.getValueType();
- if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
- ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
-
- SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
- Op.getValueType(), InOp, ShiftAmt));
+ unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+ bool AlreadySignExtended =
+ TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+ // However if the input is already sign extended we expect the sign
+ // extension to be dropped altogether later and do not simplify.
+ if (!AlreadySignExtended) {
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp,
+ ShiftAmt));
+ }
}
// Sign extension. Compute the demanded bits in the result that are not
@@ -1283,36 +1290,53 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// (zext x) == C --> x == (trunc C)
- if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // (sext x) == C --> x == (trunc C)
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ DCI.isBeforeLegalize() && N0->hasOneUse()) {
unsigned MinBits = N0.getValueSizeInBits();
- SDValue PreZExt;
+ SDValue PreExt;
+ bool Signed = false;
if (N0->getOpcode() == ISD::ZERO_EXTEND) {
// ZExt
MinBits = N0->getOperand(0).getValueSizeInBits();
- PreZExt = N0->getOperand(0);
+ PreExt = N0->getOperand(0);
} else if (N0->getOpcode() == ISD::AND) {
// DAGCombine turns costly ZExts into ANDs
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
if ((C->getAPIntValue()+1).isPowerOf2()) {
MinBits = C->getAPIntValue().countTrailingOnes();
- PreZExt = N0->getOperand(0);
+ PreExt = N0->getOperand(0);
}
+ } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
+ // SExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreExt = N0->getOperand(0);
+ Signed = true;
} else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
- // ZEXTLOAD
+ // ZEXTLOAD / SEXTLOAD
if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
MinBits = LN0->getMemoryVT().getSizeInBits();
- PreZExt = N0;
+ PreExt = N0;
+ } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
+ Signed = true;
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreExt = N0;
}
}
+ // Figure out how many bits we need to preserve this constant.
+ unsigned ReqdBits = Signed ?
+ C1.getBitWidth() - C1.getNumSignBits() + 1 :
+ C1.getActiveBits();
+
// Make sure we're not losing bits from the constant.
if (MinBits > 0 &&
- MinBits < C1.getBitWidth() && MinBits >= C1.getActiveBits()) {
+ MinBits < C1.getBitWidth() &&
+ MinBits >= ReqdBits) {
EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
// Will get folded away.
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
}
@@ -2163,9 +2187,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
-std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
+ const std::string &Constraint,
+ MVT VT) const {
if (Constraint.empty() || Constraint[0] != '{')
return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
@@ -2177,8 +2202,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
// Figure out which register class contains this reg.
- const TargetRegisterInfo *RI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
@@ -2231,8 +2254,9 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
/// and also tie in the associated operand values.
/// If this returns an empty vector, and if the constraint string itself
/// isn't empty, there was an error parsing.
-TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
- ImmutableCallSite CS) const {
+TargetLowering::AsmOperandInfoVector
+TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
+ ImmutableCallSite CS) const {
/// ConstraintOperands - Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
@@ -2323,7 +2347,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
}
// If we have multiple alternative constraints, select the best alternative.
- if (ConstraintOperands.size()) {
+ if (!ConstraintOperands.empty()) {
if (maCount) {
unsigned bestMAIndex = 0;
int bestWeight = -1;
@@ -2394,12 +2418,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- getRegForInlineAsmConstraint(Input.ConstraintCode,
- Input.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+ Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 0be00f0..b12e943 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -38,416 +38,18 @@ using namespace llvm;
#define DEBUG_TYPE "shadowstackgc"
namespace {
-
- class ShadowStackGC : public GCStrategy {
- /// RootChain - This is the global linked-list that contains the chain of GC
- /// roots.
- GlobalVariable *Head;
-
- /// StackEntryTy - Abstract type of a link in the shadow stack.
- ///
- StructType *StackEntryTy;
- StructType *FrameMapTy;
-
- /// Roots - GC roots in the current function. Each is a pair of the
- /// intrinsic call and its corresponding alloca.
- std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
-
- public:
- ShadowStackGC();
-
- bool initializeCustomLowering(Module &M) override;
- bool performCustomLowering(Function &F) override;
-
- private:
- bool IsNullValue(Value *V);
- Constant *GetFrameMap(Function &F);
- Type* GetConcreteStackEntryType(Function &F);
- void CollectRoots(Function &F);
- static GetElementPtrInst *CreateGEP(LLVMContext &Context,
- IRBuilder<> &B, Value *BasePtr,
- int Idx1, const char *Name);
- static GetElementPtrInst *CreateGEP(LLVMContext &Context,
- IRBuilder<> &B, Value *BasePtr,
- int Idx1, int Idx2, const char *Name);
- };
-
+class ShadowStackGC : public GCStrategy {
+public:
+ ShadowStackGC();
+};
}
static GCRegistry::Add<ShadowStackGC>
-X("shadow-stack", "Very portable GC for uncooperative code generators");
-
-namespace {
- /// EscapeEnumerator - This is a little algorithm to find all escape points
- /// from a function so that "finally"-style code can be inserted. In addition
- /// to finding the existing return and unwind instructions, it also (if
- /// necessary) transforms any call instructions into invokes and sends them to
- /// a landing pad.
- ///
- /// It's wrapped up in a state machine using the same transform C# uses for
- /// 'yield return' enumerators, This transform allows it to be non-allocating.
- class EscapeEnumerator {
- Function &F;
- const char *CleanupBBName;
-
- // State.
- int State;
- Function::iterator StateBB, StateE;
- IRBuilder<> Builder;
-
- public:
- EscapeEnumerator(Function &F, const char *N = "cleanup")
- : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
-
- IRBuilder<> *Next() {
- switch (State) {
- default:
- return nullptr;
-
- case 0:
- StateBB = F.begin();
- StateE = F.end();
- State = 1;
-
- case 1:
- // Find all 'return', 'resume', and 'unwind' instructions.
- while (StateBB != StateE) {
- BasicBlock *CurBB = StateBB++;
-
- // Branches and invokes do not escape, only unwind, resume, and return
- // do.
- TerminatorInst *TI = CurBB->getTerminator();
- if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
- continue;
-
- Builder.SetInsertPoint(TI->getParent(), TI);
- return &Builder;
- }
-
- State = 2;
-
- // Find all 'call' instructions.
- SmallVector<Instruction*,16> Calls;
- for (Function::iterator BB = F.begin(),
- E = F.end(); BB != E; ++BB)
- for (BasicBlock::iterator II = BB->begin(),
- EE = BB->end(); II != EE; ++II)
- if (CallInst *CI = dyn_cast<CallInst>(II))
- if (!CI->getCalledFunction() ||
- !CI->getCalledFunction()->getIntrinsicID())
- Calls.push_back(CI);
-
- if (Calls.empty())
- return nullptr;
+ X("shadow-stack", "Very portable GC for uncooperative code generators");
- // Create a cleanup block.
- LLVMContext &C = F.getContext();
- BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
- Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
- Type::getInt32Ty(C), nullptr);
- Constant *PersFn =
- F.getParent()->
- getOrInsertFunction("__gcc_personality_v0",
- FunctionType::get(Type::getInt32Ty(C), true));
- LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1,
- "cleanup.lpad",
- CleanupBB);
- LPad->setCleanup(true);
- ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+void llvm::linkShadowStackGC() {}
- // Transform the 'call' instructions into 'invoke's branching to the
- // cleanup block. Go in reverse order to make prettier BB names.
- SmallVector<Value*,16> Args;
- for (unsigned I = Calls.size(); I != 0; ) {
- CallInst *CI = cast<CallInst>(Calls[--I]);
-
- // Split the basic block containing the function call.
- BasicBlock *CallBB = CI->getParent();
- BasicBlock *NewBB =
- CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
-
- // Remove the unconditional branch inserted at the end of CallBB.
- CallBB->getInstList().pop_back();
- NewBB->getInstList().remove(CI);
-
- // Create a new invoke instruction.
- Args.clear();
- CallSite CS(CI);
- Args.append(CS.arg_begin(), CS.arg_end());
-
- InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
- NewBB, CleanupBB,
- Args, CI->getName(), CallBB);
- II->setCallingConv(CI->getCallingConv());
- II->setAttributes(CI->getAttributes());
- CI->replaceAllUsesWith(II);
- delete CI;
- }
-
- Builder.SetInsertPoint(RI->getParent(), RI);
- return &Builder;
- }
- }
- };
-}
-
-// -----------------------------------------------------------------------------
-
-void llvm::linkShadowStackGC() { }
-
-ShadowStackGC::ShadowStackGC() : Head(nullptr), StackEntryTy(nullptr) {
+ShadowStackGC::ShadowStackGC() {
InitRoots = true;
CustomRoots = true;
}
-
-Constant *ShadowStackGC::GetFrameMap(Function &F) {
- // doInitialization creates the abstract type of this value.
- Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
-
- // Truncate the ShadowStackDescriptor if some metadata is null.
- unsigned NumMeta = 0;
- SmallVector<Constant*, 16> Metadata;
- for (unsigned I = 0; I != Roots.size(); ++I) {
- Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
- if (!C->isNullValue())
- NumMeta = I + 1;
- Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
- }
- Metadata.resize(NumMeta);
-
- Type *Int32Ty = Type::getInt32Ty(F.getContext());
-
- Constant *BaseElts[] = {
- ConstantInt::get(Int32Ty, Roots.size(), false),
- ConstantInt::get(Int32Ty, NumMeta, false),
- };
-
- Constant *DescriptorElts[] = {
- ConstantStruct::get(FrameMapTy, BaseElts),
- ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
- };
-
- Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
- StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta));
-
- Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
-
- // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
- // that, short of multithreaded LLVM, it should be safe; all that is
- // necessary is that a simple Module::iterator loop not be invalidated.
- // Appending to the GlobalVariable list is safe in that sense.
- //
- // All of the output passes emit globals last. The ExecutionEngine
- // explicitly supports adding globals to the module after
- // initialization.
- //
- // Still, if it isn't deemed acceptable, then this transformation needs
- // to be a ModulePass (which means it cannot be in the 'llc' pipeline
- // (which uses a FunctionPassManager (which segfaults (not asserts) if
- // provided a ModulePass))).
- Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
- GlobalVariable::InternalLinkage,
- FrameMap, "__gc_" + F.getName());
-
- Constant *GEPIndices[2] = {
- ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
- ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
- };
- return ConstantExpr::getGetElementPtr(GV, GEPIndices);
-}
-
-Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
- // doInitialization creates the generic version of this type.
- std::vector<Type*> EltTys;
- EltTys.push_back(StackEntryTy);
- for (size_t I = 0; I != Roots.size(); I++)
- EltTys.push_back(Roots[I].second->getAllocatedType());
-
- return StructType::create(EltTys, "gc_stackentry."+F.getName().str());
-}
-
-/// doInitialization - If this module uses the GC intrinsics, find them now. If
-/// not, exit fast.
-bool ShadowStackGC::initializeCustomLowering(Module &M) {
- // struct FrameMap {
- // int32_t NumRoots; // Number of roots in stack frame.
- // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
- // void *Meta[]; // May be absent for roots without metadata.
- // };
- std::vector<Type*> EltTys;
- // 32 bits is ok up to a 32GB stack frame. :)
- EltTys.push_back(Type::getInt32Ty(M.getContext()));
- // Specifies length of variable length array.
- EltTys.push_back(Type::getInt32Ty(M.getContext()));
- FrameMapTy = StructType::create(EltTys, "gc_map");
- PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
-
- // struct StackEntry {
- // ShadowStackEntry *Next; // Caller's stack entry.
- // FrameMap *Map; // Pointer to constant FrameMap.
- // void *Roots[]; // Stack roots (in-place array, so we pretend).
- // };
-
- StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
-
- EltTys.clear();
- EltTys.push_back(PointerType::getUnqual(StackEntryTy));
- EltTys.push_back(FrameMapPtrTy);
- StackEntryTy->setBody(EltTys);
- PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
-
- // Get the root chain if it already exists.
- Head = M.getGlobalVariable("llvm_gc_root_chain");
- if (!Head) {
- // If the root chain does not exist, insert a new one with linkonce
- // linkage!
- Head = new GlobalVariable(M, StackEntryPtrTy, false,
- GlobalValue::LinkOnceAnyLinkage,
- Constant::getNullValue(StackEntryPtrTy),
- "llvm_gc_root_chain");
- } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
- Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
- Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
- }
-
- return true;
-}
-
-bool ShadowStackGC::IsNullValue(Value *V) {
- if (Constant *C = dyn_cast<Constant>(V))
- return C->isNullValue();
- return false;
-}
-
-void ShadowStackGC::CollectRoots(Function &F) {
- // FIXME: Account for original alignment. Could fragment the root array.
- // Approach 1: Null initialize empty slots at runtime. Yuck.
- // Approach 2: Emit a map of the array instead of just a count.
-
- assert(Roots.empty() && "Not cleaned up?");
-
- SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
-
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
- if (Function *F = CI->getCalledFunction())
- if (F->getIntrinsicID() == Intrinsic::gcroot) {
- std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
- CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
- if (IsNullValue(CI->getArgOperand(1)))
- Roots.push_back(Pair);
- else
- MetaRoots.push_back(Pair);
- }
-
- // Number roots with metadata (usually empty) at the beginning, so that the
- // FrameMap::Meta array can be elided.
- Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
-}
-
-GetElementPtrInst *
-ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
- int Idx, int Idx2, const char *Name) {
- Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
- ConstantInt::get(Type::getInt32Ty(Context), Idx),
- ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
- Value* Val = B.CreateGEP(BasePtr, Indices, Name);
-
- assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
-
- return dyn_cast<GetElementPtrInst>(Val);
-}
-
-GetElementPtrInst *
-ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
- int Idx, const char *Name) {
- Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
- ConstantInt::get(Type::getInt32Ty(Context), Idx) };
- Value *Val = B.CreateGEP(BasePtr, Indices, Name);
-
- assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
-
- return dyn_cast<GetElementPtrInst>(Val);
-}
-
-/// runOnFunction - Insert code to maintain the shadow stack.
-bool ShadowStackGC::performCustomLowering(Function &F) {
- LLVMContext &Context = F.getContext();
-
- // Find calls to llvm.gcroot.
- CollectRoots(F);
-
- // If there are no roots in this function, then there is no need to add a
- // stack map entry for it.
- if (Roots.empty())
- return false;
-
- // Build the constant map and figure the type of the shadow stack entry.
- Value *FrameMap = GetFrameMap(F);
- Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
-
- // Build the shadow stack entry at the very start of the function.
- BasicBlock::iterator IP = F.getEntryBlock().begin();
- IRBuilder<> AtEntry(IP->getParent(), IP);
-
- Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr,
- "gc_frame");
-
- while (isa<AllocaInst>(IP)) ++IP;
- AtEntry.SetInsertPoint(IP->getParent(), IP);
-
- // Initialize the map pointer and load the current head of the shadow stack.
- Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
- Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry,
- 0,1,"gc_frame.map");
- AtEntry.CreateStore(FrameMap, EntryMapPtr);
-
- // After all the allocas...
- for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
- // For each root, find the corresponding slot in the aggregate...
- Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
-
- // And use it in lieu of the alloca.
- AllocaInst *OriginalAlloca = Roots[I].second;
- SlotPtr->takeName(OriginalAlloca);
- OriginalAlloca->replaceAllUsesWith(SlotPtr);
- }
-
- // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
- // really necessary (the collector would never see the intermediate state at
- // runtime), but it's nicer not to push the half-initialized entry onto the
- // shadow stack.
- while (isa<StoreInst>(IP)) ++IP;
- AtEntry.SetInsertPoint(IP->getParent(), IP);
-
- // Push the entry onto the shadow stack.
- Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
- StackEntry,0,0,"gc_frame.next");
- Instruction *NewHeadVal = CreateGEP(Context, AtEntry,
- StackEntry, 0, "gc_newhead");
- AtEntry.CreateStore(CurrentHead, EntryNextPtr);
- AtEntry.CreateStore(NewHeadVal, Head);
-
- // For each instruction that escapes...
- EscapeEnumerator EE(F, "gc_cleanup");
- while (IRBuilder<> *AtExit = EE.Next()) {
- // Pop the entry from the shadow stack. Don't reuse CurrentHead from
- // AtEntry, since that would make the value live for the entire function.
- Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
- "gc_frame.next");
- Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
- AtExit->CreateStore(SavedHead, Head);
- }
-
- // Delete the original allocas (which are no longer used) and the intrinsic
- // calls (which are no longer valid). Doing this last avoids invalidating
- // iterators.
- for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
- Roots[I].first->eraseFromParent();
- Roots[I].second->eraseFromParent();
- }
-
- Roots.clear();
- return true;
-}
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
new file mode 100644
index 0000000..f6393a5
--- /dev/null
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -0,0 +1,457 @@
+//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom lowering code required by the shadow-stack GC
+// strategy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "shadowstackgclowering"
+
+namespace {
+
+class ShadowStackGCLowering : public FunctionPass {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ StructType *StackEntryTy;
+ StructType *FrameMapTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst *, AllocaInst *>> Roots;
+
+public:
+ static char ID;
+ ShadowStackGCLowering();
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ Type *GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+ Value *BasePtr, int Idx1,
+ const char *Name);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+ Value *BasePtr, int Idx1, int Idx2,
+ const char *Name);
+};
+}
+
+INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+ "Shadow Stack GC Lowering", false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+ "Shadow Stack GC Lowering", false, false)
+
+FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
+
+char ShadowStackGCLowering::ID = 0;
+
+ShadowStackGCLowering::ShadowStackGCLowering()
+ : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr),
+ FrameMapTy(nullptr) {
+ initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
+}
+
+namespace {
+/// EscapeEnumerator - This is a little algorithm to find all escape points
+/// from a function so that "finally"-style code can be inserted. In addition
+/// to finding the existing return and unwind instructions, it also (if
+/// necessary) transforms any call instructions into invokes and sends them to
+/// a landing pad.
+///
+/// It's wrapped up in a state machine using the same transform C# uses for
+/// 'yield return' enumerators, This transform allows it to be non-allocating.
+class EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return nullptr;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return', 'resume', and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = StateBB++;
+
+ // Branches and invokes do not escape, only unwind, resume, and return
+ // do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI->getParent(), TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction *, 16> Calls;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE;
+ ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return nullptr;
+
+ // Create a cleanup block.
+ LLVMContext &C = F.getContext();
+ BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+ Type *ExnTy =
+ StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
+ Constant *PersFn = F.getParent()->getOrInsertFunction(
+ "__gcc_personality_v0", FunctionType::get(Type::getInt32Ty(C), true));
+ LandingPadInst *LPad =
+ LandingPadInst::Create(ExnTy, PersFn, 1, "cleanup.lpad", CleanupBB);
+ LPad->setCleanup(true);
+ ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value *, 16> Args;
+ for (unsigned I = Calls.size(); I != 0;) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB =
+ CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ CallSite CS(CI);
+ Args.append(CS.arg_begin(), CS.arg_end());
+
+ InvokeInst *II =
+ InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args,
+ CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(RI->getParent(), RI);
+ return &Builder;
+ }
+ }
+};
+}
+
+
+Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+ Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant *, 16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+ Metadata.resize(NumMeta);
+
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Int32Ty, Roots.size(), false),
+ ConstantInt::get(Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(FrameMapTy, BaseElts),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)};
+
+ Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()};
+ StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta));
+
+ Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage, FrameMap,
+ "__gc_" + F.getName());
+
+ Constant *GEPIndices[2] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices);
+}
+
+Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<Type *> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+
+ return StructType::create(EltTys, "gc_stackentry." + F.getName().str());
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGCLowering::doInitialization(Module &M) {
+ bool Active = false;
+ for (Function &F : M) {
+ if (F.hasGC() && F.getGC() == std::string("shadow-stack")) {
+ Active = true;
+ break;
+ }
+ }
+ if (!Active)
+ return false;
+
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<Type *> EltTys;
+ // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ // Specifies length of variable length array.
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ FrameMapTy = StructType::create(EltTys, "gc_map");
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+
+ StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+ EltTys.push_back(FrameMapPtrTy);
+ StackEntryTy->setBody(EltTys);
+ PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(
+ M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain");
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGCLowering::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGCLowering::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
+ CI,
+ cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx, int Idx2,
+ const char *Name) {
+ Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
+ Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx)};
+ Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGCLowering::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use the shadow stack GC.
+ if (!F.hasGC() ||
+ F.getGC() != std::string("shadow-stack"))
+ return false;
+
+ LLVMContext &Context = F.getContext();
+
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry =
+ AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");
+
+ while (isa<AllocaInst>(IP))
+ ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr =
+ CreateGEP(Context, AtEntry, StackEntry, 0, 1, "gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP))
+ ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr =
+ CreateGEP(Context, AtEntry, StackEntry, 0, 0, "gc_frame.next");
+ Instruction *NewHeadVal =
+ CreateGEP(Context, AtEntry, StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 =
+ CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 7fd8107..35e4292 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -191,7 +191,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
- const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
unsigned Align =
TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index ea7b914..dab1dfe 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -120,10 +120,9 @@ void SplitAnalysis::analyzeUses() {
// First get all the defs from the interval values. This provides the correct
// slots for early clobbers.
- for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(),
- E = CurLI->vni_end(); I != E; ++I)
- if (!(*I)->isPHIDef() && !(*I)->isUnused())
- UseSlots.push_back((*I)->def);
+ for (const VNInfo *VNI : CurLI->valnos)
+ if (!VNI->isPHIDef() && !VNI->isUnused())
+ UseSlots.push_back(VNI->def);
// Get use slots form the use-def chain.
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -624,8 +623,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
AssignI.setMap(RegAssign);
for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
- VNInfo *VNI = Copies[i];
- SlotIndex Def = VNI->def;
+ SlotIndex Def = Copies[i]->def;
MachineInstr *MI = LIS.getInstructionFromIndex(Def);
assert(MI && "No instruction for back-copy");
@@ -636,13 +634,12 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
while (!AtBegin && (--MBBI)->isDebugValue());
DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
- LI->removeValNo(VNI);
+ LIS.removeVRegDefAt(*LI, Def);
LIS.RemoveMachineInstrFromMaps(MI);
MI->eraseFromParent();
- // Adjust RegAssign if a register assignment is killed at VNI->def. We
- // want to avoid calculating the live range of the source register if
- // possible.
+ // Adjust RegAssign if a register assignment is killed at Def. We want to
+ // avoid calculating the live range of the source register if possible.
AssignI.find(Def.getPrevSlot());
if (!AssignI.valid() || AssignI.start() >= Def)
continue;
@@ -727,9 +724,7 @@ void SplitEditor::hoistCopiesForSize() {
// Find the nearest common dominator for parent values with multiple
// back-copies. If a single back-copy dominates, put it in DomPair.second.
- for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
- VI != VE; ++VI) {
- VNInfo *VNI = *VI;
+ for (VNInfo *VNI : LI->valnos) {
if (VNI->isUnused())
continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
@@ -802,9 +797,7 @@ void SplitEditor::hoistCopiesForSize() {
// Remove redundant back-copies that are now known to be dominated by another
// def with the same value.
SmallVector<VNInfo*, 8> BackCopies;
- for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
- VI != VE; ++VI) {
- VNInfo *VNI = *VI;
+ for (VNInfo *VNI : LI->valnos) {
if (VNI->isUnused())
continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
@@ -823,16 +816,15 @@ void SplitEditor::hoistCopiesForSize() {
bool SplitEditor::transferValues() {
bool Skipped = false;
RegAssignMap::const_iterator AssignI = RegAssign.begin();
- for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
- ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
- DEBUG(dbgs() << " blit " << *ParentI << ':');
- VNInfo *ParentVNI = ParentI->valno;
+ for (const LiveRange::Segment &S : Edit->getParent()) {
+ DEBUG(dbgs() << " blit " << S << ':');
+ VNInfo *ParentVNI = S.valno;
// RegAssign has holes where RegIdx 0 should be used.
- SlotIndex Start = ParentI->start;
+ SlotIndex Start = S.start;
AssignI.advanceTo(Start);
do {
unsigned RegIdx;
- SlotIndex End = ParentI->end;
+ SlotIndex End = S.end;
if (!AssignI.valid()) {
RegIdx = 0;
} else if (AssignI.start() <= Start) {
@@ -917,7 +909,7 @@ bool SplitEditor::transferValues() {
++MBB;
}
Start = End;
- } while (Start != ParentI->end);
+ } while (Start != S.end);
DEBUG(dbgs() << '\n');
}
@@ -930,9 +922,7 @@ bool SplitEditor::transferValues() {
void SplitEditor::extendPHIKillRanges() {
// Extend live ranges to be live-out for successor PHI values.
- for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
- E = Edit->getParent().vni_end(); I != E; ++I) {
- const VNInfo *PHIVNI = *I;
+ for (const VNInfo *PHIVNI : Edit->getParent().valnos) {
if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
continue;
unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
@@ -1006,12 +996,11 @@ void SplitEditor::deleteRematVictims() {
SmallVector<MachineInstr*, 8> Dead;
for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
LiveInterval *LI = &LIS.getInterval(*I);
- for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
- LII != LIE; ++LII) {
+ for (const LiveRange::Segment &S : LI->segments) {
// Dead defs end at the dead slot.
- if (LII->end != LII->valno->def.getDeadSlot())
+ if (S.end != S.valno->def.getDeadSlot())
continue;
- MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
+ MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
assert(MI && "Missing instruction for dead def");
MI->addRegisterDead(LI->reg, &TRI);
@@ -1036,9 +1025,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
// the inserted copies.
// Add the original defs from the parent interval.
- for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
- E = Edit->getParent().vni_end(); I != E; ++I) {
- const VNInfo *ParentVNI = *I;
+ for (const VNInfo *ParentVNI : Edit->getParent().valnos) {
if (ParentVNI->isUnused())
continue;
unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index dcf1b44..faf94b6 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -463,7 +463,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
if (!VI.Var)
continue;
if (SlotRemap.count(VI.Slot)) {
- DEBUG(dbgs()<<"Remapping debug info for ["<<VI.Var->getName()<<"].\n");
+ DEBUG(dbgs() << "Remapping debug info for ["
+ << DIVariable(VI.Var).getName() << "].\n");
VI.Slot = SlotRemap[VI.Slot];
FixedDbg++;
}
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index c2ee87a..767f43a 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -123,5 +123,7 @@ uint32_t *StackMapLiveness::createRegisterMask() const {
for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end();
RI != RE; ++RI)
Mask[*RI / 32] |= 1U << (*RI % 32);
+
+ TRI->adjustStackMapLiveOutMask(Mask);
return Mask;
}
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index d3791c3..5d46419 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -84,8 +84,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
switch (MOI->getImm()) {
default: llvm_unreachable("Unrecognized operand type.");
case StackMaps::DirectMemRefOp: {
- unsigned Size =
- AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits();
+ unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
assert((Size % 8) == 0 && "Need pointer size in bytes.");
Size /= 8;
unsigned Reg = (++MOI)->getReg();
@@ -241,7 +240,7 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
// entry.
const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub(
MCSymbolRefExpr::Create(MILabel, OutContext),
- MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext),
+ MCSymbolRefExpr::Create(AP.CurrentFnSymForSize, OutContext),
OutContext);
CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
@@ -286,6 +285,18 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
}
#endif
}
+void StackMaps::recordStatepoint(const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::STATEPOINT &&
+ "expected statepoint");
+
+ StatepointOpers opers(&MI);
+ // Record all the deopt and gc operands (they're contiguous and run from the
+ // initial index to the end of the operand list)
+ const unsigned StartIdx = opers.getVarIdx();
+ recordStackMapOpers(MI, 0xABCDEF00,
+ MI.operands_begin() + StartIdx, MI.operands_end(),
+ false);
+}
/// Emit the stackmap header.
///
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 45f97ac..d1fae95 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/Passes.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -86,10 +88,9 @@ bool StackProtector::runOnFunction(Function &Fn) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- TLI = TM->getSubtargetImpl()->getTargetLowering();
+ TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
- Attribute Attr = Fn.getAttributes().getAttribute(
- AttributeSet::FunctionIndex, "stack-protector-buffer-size");
+ Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size");
if (Attr.isStringAttribute() &&
Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
return false; // Invalid integer string
@@ -199,31 +200,24 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
bool StackProtector::RequiresStackProtector() {
bool Strong = false;
bool NeedsProtector = false;
- if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq)) {
+ if (F->hasFnAttribute(Attribute::StackProtectReq)) {
NeedsProtector = true;
Strong = true; // Use the same heuristic as strong to determine SSPLayout
- } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectStrong))
+ } else if (F->hasFnAttribute(Attribute::StackProtectStrong))
Strong = true;
- else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtect))
+ else if (!F->hasFnAttribute(Attribute::StackProtect))
return false;
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- BasicBlock *BB = I;
-
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;
- ++II) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (AI->isArrayAllocation()) {
// SSP-Strong: Enable protectors for any call to alloca, regardless
// of size.
if (Strong)
return true;
- if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(AI->getArraySize())) {
+ if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
@@ -335,7 +329,7 @@ static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI,
/// Returns true if the platform/triple supports the stackprotectorcreate pseudo
/// node.
static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
- const TargetLoweringBase *TLI, const Triple &Trip,
+ const TargetLoweringBase *TLI, const Triple &TT,
AllocaInst *&AI, Value *&StackGuardVar) {
bool SupportsSelectionDAGSP = false;
PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
@@ -344,9 +338,10 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
Constant *OffsetVal =
ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
- StackGuardVar = ConstantExpr::getIntToPtr(
- OffsetVal, PointerType::get(PtrTy, AddressSpace));
- } else if (Trip.getOS() == llvm::Triple::OpenBSD) {
+ StackGuardVar =
+ ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy,
+ AddressSpace));
+ } else if (TT.isOSOpenBSD()) {
StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy);
cast<GlobalValue>(StackGuardVar)
->setVisibility(GlobalValue::HiddenVisibility);
@@ -399,14 +394,13 @@ bool StackProtector::InsertStackProtectors() {
InsertionPt = RI;
// At this point we know that BB has a return statement so it *DOES*
// have a terminator.
- assert(InsertionPt != nullptr && "BB must have a terminator instruction at "
- "this point.");
+ assert(InsertionPt != nullptr &&
+ "BB must have a terminator instruction at this point.");
}
Function *Intrinsic =
Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck);
CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt);
-
} else {
// If we do not support SelectionDAG based tail calls, generate IR level
// tail calls.
@@ -459,11 +453,17 @@ bool StackProtector::InsertStackProtectors() {
LoadInst *LI1 = B.CreateLoad(StackGuardVar);
LoadInst *LI2 = B.CreateLoad(AI);
Value *Cmp = B.CreateICmpEQ(LI1, LI2);
- B.CreateCondBr(Cmp, NewBB, FailBB);
+ unsigned SuccessWeight =
+ BranchProbabilityInfo::getBranchWeightStackProtector(true);
+ unsigned FailureWeight =
+ BranchProbabilityInfo::getBranchWeightStackProtector(false);
+ MDNode *Weights = MDBuilder(F->getContext())
+ .createBranchWeights(SuccessWeight, FailureWeight);
+ B.CreateCondBr(Cmp, NewBB, FailBB, Weights);
}
}
- // Return if we didn't modify any basic blocks. I.e., there are no return
+ // Return if we didn't modify any basic blocks. i.e., there are no return
// statements in the function.
if (!HasPrologue)
return false;
@@ -477,15 +477,17 @@ BasicBlock *StackProtector::CreateFailBB() {
LLVMContext &Context = F->getContext();
BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F);
IRBuilder<> B(FailBB);
- if (Trip.getOS() == llvm::Triple::OpenBSD) {
- Constant *StackChkFail = M->getOrInsertFunction(
- "__stack_smash_handler", Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context), nullptr);
+ if (Trip.isOSOpenBSD()) {
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_smash_handler",
+ Type::getVoidTy(Context),
+ Type::getInt8PtrTy(Context), nullptr);
B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
- Constant *StackChkFail = M->getOrInsertFunction(
- "__stack_chk_fail", Type::getVoidTy(Context), nullptr);
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context),
+ nullptr);
B.CreateCall(StackChkFail);
}
B.CreateUnreachable();
diff --git a/lib/CodeGen/StatepointExampleGC.cpp b/lib/CodeGen/StatepointExampleGC.cpp
new file mode 100644
index 0000000..95dfd75
--- /dev/null
+++ b/lib/CodeGen/StatepointExampleGC.cpp
@@ -0,0 +1,55 @@
+//===-- StatepointDefaultGC.cpp - The default statepoint GC strategy ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a GCStrategy which serves as an example for the usage
+// of a statepoint based lowering strategy. This GCStrategy is intended to
+// suitable as a default implementation usable with any collector which can
+// consume the standard stackmap format generated by statepoints, uses the
+// default addrespace to distinguish between gc managed and non-gc managed
+// pointers, and has reasonable relocation semantics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+namespace {
+class StatepointGC : public GCStrategy {
+public:
+ StatepointGC() {
+ UseStatepoints = true;
+ // These options are all gc.root specific, we specify them so that the
+ // gc.root lowering code doesn't run.
+ InitRoots = false;
+ NeededSafePoints = 0;
+ UsesMetadata = false;
+ CustomRoots = false;
+ }
+ Optional<bool> isGCManagedPointer(const Value *V) const override {
+ // Method is only valid on pointer typed values.
+ PointerType *PT = cast<PointerType>(V->getType());
+ // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
+ // GC managed heap. We know that a pointer into this heap needs to be
+ // updated and that no other pointer does. Note that addrspace(1) is used
+ // only as an example, it has no special meaning, and is not reserved for
+ // GC usage.
+ return (1 == PT->getAddressSpace());
+ }
+};
+}
+
+static GCRegistry::Add<StatepointGC> X("statepoint-example",
+ "an example strategy for statepoint");
+
+namespace llvm {
+void linkStatepointExampleGC() {}
+}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 4377236..04b3992 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -560,8 +560,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// compensate for the duplication.
unsigned MaxDuplicateCount;
if (TailDuplicateSize.getNumOccurrences() == 0 &&
- MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+ MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
MaxDuplicateCount = 1;
else
MaxDuplicateCount = TailDuplicateSize;
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 1557d10..e3f0191 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -42,3 +42,8 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
FrameReg = RI->getFrameRegister(MF);
return getFrameIndexOffset(MF, FI);
}
+
+bool TargetFrameLowering::needsFrameIndexResolution(
+ const MachineFunction &MF) const {
+ return MF.getFrameInfo()->hasStackObjects();
+}
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index ab45f89..2566c1f 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -307,7 +308,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
- if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) {
+ if (!TM->getDataLayout()->isLittleEndian()) {
Offset = RC->getSize() - (Offset + Size);
}
return true;
@@ -644,6 +645,28 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
return true;
}
+int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ int FrameSetupOpcode = getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = getCallFrameDestroyOpcode();
+
+ if (MI->getOpcode() != FrameSetupOpcode &&
+ MI->getOpcode() != FrameDestroyOpcode)
+ return 0;
+
+ int SPAdj = MI->getOperand(0).getImm();
+
+ if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode))
+ SPAdj = -SPAdj;
+
+ return SPAdj;
+}
+
/// isSchedulingBoundary - Test if the given instruction should be
/// considered a scheduling boundary. This primarily includes labels
/// and terminators.
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index e833fd3..9048a44 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -414,7 +414,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::SINCOS_PPCF128] = nullptr;
}
- if (TT.getOS() != Triple::OpenBSD) {
+ if (!TT.isOSOpenBSD()) {
Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
} else {
// These are generally not available.
@@ -696,7 +696,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
/// NOTE: The TargetMachine owns TLOF.
TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
- : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) {
+ : TM(tm), DL(TM.getDataLayout()) {
initActions();
// Perform these initializations only once.
@@ -710,10 +710,12 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
IntDivIsCheap = false;
+ FsqrtIsCheap = false;
Pow2SDivIsCheap = false;
JumpIsExpensive = false;
PredictableSelectIsExpensive = false;
MaskAndBranchFoldingIsLegal = false;
+ EnableExtLdPromotion = false;
HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;
@@ -747,37 +749,33 @@ void TargetLoweringBase::initActions() {
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
// Set default actions for various operations.
- for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ for (MVT VT : MVT::all_valuetypes()) {
// Default all indexed load / store to expand.
for (unsigned IM = (unsigned)ISD::PRE_INC;
IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
- setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
- setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+ setIndexedLoadAction(IM, VT, Expand);
+ setIndexedStoreAction(IM, VT, Expand);
}
// Most backends expect to see the node which just returns the value loaded.
- setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
- (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
// These operations default to expand.
- setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FMINNUM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FMAXNUM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FGETSIGN, VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
+ setOperationAction(ISD::FMINNUM, VT, Expand);
+ setOperationAction(ISD::FMAXNUM, VT, Expand);
+ setOperationAction(ISD::FMAD, VT, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FROUND, VT, Expand);
// These operations default to expand for vector types.
- if (VT >= MVT::FIRST_VECTOR_VALUETYPE &&
- VT <= MVT::LAST_VECTOR_VALUETYPE) {
- setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,
- (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG,
- (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG,
- (MVT::SimpleValueType)VT, Expand);
+ if (VT.isVector()) {
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
}
@@ -897,6 +895,138 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
}
}
+TargetLoweringBase::LegalizeKind
+TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
+ // If this is a simple type, use the ComputeRegisterProp mechanism.
+ if (VT.isSimple()) {
+ MVT SVT = VT.getSimpleVT();
+ assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
+ MVT NVT = TransformToType[SVT.SimpleTy];
+ LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
+
+ assert((LA == TypeLegal || LA == TypeSoftenFloat ||
+ ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) &&
+ "Promote may not follow Expand or Promote");
+
+ if (LA == TypeSplitVector)
+ return LegalizeKind(LA,
+ EVT::getVectorVT(Context, SVT.getVectorElementType(),
+ SVT.getVectorNumElements() / 2));
+ if (LA == TypeScalarizeVector)
+ return LegalizeKind(LA, SVT.getVectorElementType());
+ return LegalizeKind(LA, NVT);
+ }
+
+ // Handle Extended Scalar Types.
+ if (!VT.isVector()) {
+ assert(VT.isInteger() && "Float types must be simple");
+ unsigned BitSize = VT.getSizeInBits();
+ // First promote to a power-of-two size, then expand if necessary.
+ if (BitSize < 8 || !isPowerOf2_32(BitSize)) {
+ EVT NVT = VT.getRoundIntegerType(Context);
+ assert(NVT != VT && "Unable to round integer VT");
+ LegalizeKind NextStep = getTypeConversion(Context, NVT);
+ // Avoid multi-step promotion.
+ if (NextStep.first == TypePromoteInteger)
+ return NextStep;
+ // Return rounded integer type.
+ return LegalizeKind(TypePromoteInteger, NVT);
+ }
+
+ return LegalizeKind(TypeExpandInteger,
+ EVT::getIntegerVT(Context, VT.getSizeInBits() / 2));
+ }
+
+ // Handle vector types.
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+
+ // Vectors with only one element are always scalarized.
+ if (NumElts == 1)
+ return LegalizeKind(TypeScalarizeVector, EltVT);
+
+ // Try to widen vector elements until the element type is a power of two and
+ // promote it to a legal type later on, for example:
+ // <3 x i8> -> <4 x i8> -> <4 x i32>
+ if (EltVT.isInteger()) {
+ // Vectors with a number of elements that is not a power of two are always
+ // widened, for example <3 x i8> -> <4 x i8>.
+ if (!VT.isPow2VectorType()) {
+ NumElts = (unsigned)NextPowerOf2(NumElts);
+ EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
+ return LegalizeKind(TypeWidenVector, NVT);
+ }
+
+ // Examine the element type.
+ LegalizeKind LK = getTypeConversion(Context, EltVT);
+
+ // If type is to be expanded, split the vector.
+ // <4 x i140> -> <2 x i140>
+ if (LK.first == TypeExpandInteger)
+ return LegalizeKind(TypeSplitVector,
+ EVT::getVectorVT(Context, EltVT, NumElts / 2));
+
+ // Promote the integer element types until a legal vector type is found
+ // or until the element integer type is too big. If a legal type was not
+ // found, fallback to the usual mechanism of widening/splitting the
+ // vector.
+ EVT OldEltVT = EltVT;
+ while (1) {
+ // Increase the bitwidth of the element to the next pow-of-two
+ // (which is greater than 8 bits).
+ EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
+ .getRoundIntegerType(Context);
+
+ // Stop trying when getting a non-simple element type.
+ // Note that vector elements may be greater than legal vector element
+ // types. Example: X86 XMM registers hold 64bit element on 32bit
+ // systems.
+ if (!EltVT.isSimple())
+ break;
+
+ // Build a new vector type and check if it is legal.
+ MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+ // Found a legal promoted vector type.
+ if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
+ return LegalizeKind(TypePromoteInteger,
+ EVT::getVectorVT(Context, EltVT, NumElts));
+ }
+
+ // Reset the type to the unexpanded type if we did not find a legal vector
+ // type with a promoted vector element type.
+ EltVT = OldEltVT;
+ }
+
+ // Try to widen the vector until a legal type is found.
+ // If there is no wider legal type, split the vector.
+ while (1) {
+ // Round up to the next power of 2.
+ NumElts = (unsigned)NextPowerOf2(NumElts);
+
+ // If there is no simple vector type with this many elements then there
+ // cannot be a larger legal vector type. Note that this assumes that
+ // there are no skipped intermediate vector types in the simple types.
+ if (!EltVT.isSimple())
+ break;
+ MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+ if (LargerVector == MVT())
+ break;
+
+ // If this type is legal then widen the vector.
+ if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal)
+ return LegalizeKind(TypeWidenVector, LargerVector);
+ }
+
+ // Widen odd vectors to next power of two.
+ if (!VT.isPow2VectorType()) {
+ EVT NVT = VT.getPow2VectorType(Context);
+ return LegalizeKind(TypeWidenVector, NVT);
+ }
+
+ // Vectors with illegal element types are expanded.
+ EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
+ return LegalizeKind(TypeSplitVector, NVT);
+}
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned &NumIntermediates,
@@ -992,10 +1122,15 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
// Add a new memory operand for this FI.
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
+
+ unsigned Flags = MachineMemOperand::MOLoad;
+ if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
+ Flags |= MachineMemOperand::MOStore;
+ Flags |= MachineMemOperand::MOVolatile;
+ }
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
- TM.getSubtargetImpl()->getDataLayout()->getPointerSize(),
- MFI.getObjectAlignment(FI));
+ MachinePointerInfo::getFixedStack(FI), Flags,
+ TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI));
MIB->addMemOperand(MF, MMO);
// Replace the instruction and update the operand index.
@@ -1009,10 +1144,9 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
-std::pair<const TargetRegisterClass*, uint8_t>
-TargetLoweringBase::findRepresentativeClass(MVT VT) const {
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+std::pair<const TargetRegisterClass *, uint8_t>
+TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
if (!RC)
return std::make_pair(RC, 0);
@@ -1038,7 +1172,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const {
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
-void TargetLoweringBase::computeRegisterProperties() {
+void TargetLoweringBase::computeRegisterProperties(
+ const TargetRegisterInfo *TRI) {
static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
"Too many value types for ValueTypeActions to hold!");
@@ -1220,7 +1355,7 @@ void TargetLoweringBase::computeRegisterProperties() {
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
const TargetRegisterClass* RRC;
uint8_t Cost;
- std::tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+ std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
RepRegClassForVT[i] = RRC;
RepRegClassCostForVT[i] = Cost;
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index efd15e1..c1b34f7 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -71,12 +71,10 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
const MCSection *Sec = getContext().getELFSection(NameData,
ELF::SHT_PROGBITS,
Flags,
- SectionKind::getDataRel(),
0, Label->getName());
- unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+ unsigned Size = TM.getDataLayout()->getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(
- TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment());
+ Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::Create(Size, getContext());
Streamer.EmitELFSize(Label, E);
@@ -166,9 +164,7 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
return ELF::SHT_PROGBITS;
}
-
-static unsigned
-getELFSectionFlags(SectionKind K) {
+static unsigned getELFSectionFlags(SectionKind K) {
unsigned Flags = 0;
if (!K.isMetadata())
@@ -183,9 +179,7 @@ getELFSectionFlags(SectionKind K) {
if (K.isThreadLocal())
Flags |= ELF::SHF_TLS;
- // K.isMergeableConst() is left out to honour PR4650
- if (K.isMergeableCString() || K.isMergeableConst4() ||
- K.isMergeableConst8() || K.isMergeableConst16())
+ if (K.isMergeableCString() || K.isMergeableConst())
Flags |= ELF::SHF_MERGE;
if (K.isMergeableCString())
@@ -222,120 +216,121 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
}
return getContext().getELFSection(SectionName,
getELFSectionType(SectionName, Kind), Flags,
- Kind, /*EntrySize=*/0, Group);
+ /*EntrySize=*/0, Group);
}
-/// getSectionPrefixForGlobal - Return the section prefix name used by options
-/// FunctionsSections and DataSections.
+/// Return the section prefix name used by options FunctionsSections and
+/// DataSections.
static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
- if (Kind.isText()) return ".text.";
- if (Kind.isReadOnly()) return ".rodata.";
- if (Kind.isBSS()) return ".bss.";
-
- if (Kind.isThreadData()) return ".tdata.";
- if (Kind.isThreadBSS()) return ".tbss.";
-
- if (Kind.isDataNoRel()) return ".data.";
- if (Kind.isDataRelLocal()) return ".data.rel.local.";
- if (Kind.isDataRel()) return ".data.rel.";
- if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
-
+ if (Kind.isText())
+ return ".text";
+ if (Kind.isReadOnly())
+ return ".rodata";
+ if (Kind.isBSS())
+ return ".bss";
+ if (Kind.isThreadData())
+ return ".tdata";
+ if (Kind.isThreadBSS())
+ return ".tbss";
+ if (Kind.isDataNoRel())
+ return ".data";
+ if (Kind.isDataRelLocal())
+ return ".data.rel.local";
+ if (Kind.isDataRel())
+ return ".data.rel";
+ if (Kind.isReadOnlyWithRelLocal())
+ return ".data.rel.ro.local";
assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return ".data.rel.ro.";
+ return ".data.rel.ro";
}
const MCSection *TargetLoweringObjectFileELF::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM) const {
+ unsigned Flags = getELFSectionFlags(Kind);
+
// If we have -ffunction-section or -fdata-section then we should emit the
// global value to a uniqued section specifically for it.
- bool EmitUniquedSection;
- if (Kind.isText())
- EmitUniquedSection = TM.getFunctionSections();
- else
- EmitUniquedSection = TM.getDataSections();
-
- // If this global is linkonce/weak and the target handles this by emitting it
- // into a 'uniqued' section name, create and return the section now.
- if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
- !Kind.isCommon()) {
- StringRef Prefix = getSectionPrefixForGlobal(Kind);
-
- SmallString<128> Name(Prefix);
- TM.getNameWithPrefix(Name, GV, Mang, true);
-
- StringRef Group = "";
- unsigned Flags = getELFSectionFlags(Kind);
- if (GV->isWeakForLinker() || GV->hasComdat()) {
- if (const Comdat *C = getELFComdat(GV))
- Group = C->getName();
- else
- Group = Name.substr(Prefix.size());
- Flags |= ELF::SHF_GROUP;
+ bool EmitUniqueSection = false;
+ if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ }
+ EmitUniqueSection |= GV->hasComdat();
+
+ unsigned EntrySize = 0;
+ if (Kind.isMergeableCString()) {
+ if (Kind.isMergeable2ByteCString()) {
+ EntrySize = 2;
+ } else if (Kind.isMergeable4ByteCString()) {
+ EntrySize = 4;
+ } else {
+ EntrySize = 1;
+ assert(Kind.isMergeable1ByteCString() && "unknown string width");
+ }
+ } else if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4()) {
+ EntrySize = 4;
+ } else if (Kind.isMergeableConst8()) {
+ EntrySize = 8;
+ } else {
+ assert(Kind.isMergeableConst16() && "unknown data width");
+ EntrySize = 16;
}
-
- return getContext().getELFSection(Name.str(),
- getELFSectionType(Name.str(), Kind),
- Flags, Kind, 0, Group);
}
- if (Kind.isText()) return TextSection;
-
- if (Kind.isMergeable1ByteCString() ||
- Kind.isMergeable2ByteCString() ||
- Kind.isMergeable4ByteCString()) {
+ StringRef Group = "";
+ if (const Comdat *C = getELFComdat(GV)) {
+ Flags |= ELF::SHF_GROUP;
+ Group = C->getName();
+ }
+ bool UniqueSectionNames = TM.getUniqueSectionNames();
+ SmallString<128> Name;
+ if (Kind.isMergeableCString()) {
// We also need alignment here.
// FIXME: this is getting the alignment of the character, not the
// alignment of the global!
unsigned Align =
- TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
- cast<GlobalVariable>(GV));
-
- const char *SizeSpec = ".rodata.str1.";
- if (Kind.isMergeable2ByteCString())
- SizeSpec = ".rodata.str2.";
- else if (Kind.isMergeable4ByteCString())
- SizeSpec = ".rodata.str4.";
- else
- assert(Kind.isMergeable1ByteCString() && "unknown string width");
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
-
- std::string Name = SizeSpec + utostr(Align);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |
- ELF::SHF_MERGE |
- ELF::SHF_STRINGS,
- Kind);
+ std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+ Name = SizeSpec + utostr(Align);
+ } else if (Kind.isMergeableConst()) {
+ Name = ".rodata.cst";
+ Name += utostr(EntrySize);
+ } else {
+ Name = getSectionPrefixForGlobal(Kind);
}
- if (Kind.isMergeableConst()) {
- if (Kind.isMergeableConst4() && MergeableConst4Section)
- return MergeableConst4Section;
- if (Kind.isMergeableConst8() && MergeableConst8Section)
- return MergeableConst8Section;
- if (Kind.isMergeableConst16() && MergeableConst16Section)
- return MergeableConst16Section;
- return ReadOnlySection; // .const
+ if (EmitUniqueSection && UniqueSectionNames) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GV, Mang, true);
}
+ return getContext().getELFSection(Name, getELFSectionType(Name, Kind), Flags,
+ EntrySize, Group,
+ EmitUniqueSection && !UniqueSectionNames);
+}
+
+const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
+ const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ const Comdat *C = F.getComdat();
+ bool EmitUniqueSection = TM.getFunctionSections() || C;
+ if (!EmitUniqueSection)
+ return ReadOnlySection;
- if (Kind.isReadOnly()) return ReadOnlySection;
-
- if (Kind.isThreadData()) return TLSDataSection;
- if (Kind.isThreadBSS()) return TLSBSSSection;
-
- // Note: we claim that common symbols are put in BSSSection, but they are
- // really emitted with the magic .comm directive, which creates a symbol table
- // entry but not a section.
- if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
-
- if (Kind.isDataNoRel()) return DataSection;
- if (Kind.isDataRelLocal()) return DataRelLocalSection;
- if (Kind.isDataRel()) return DataRelSection;
- if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+ return SelectSectionForGlobal(&F, SectionKind::getReadOnly(), Mang, TM);
+}
- assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return DataRelROSection;
+bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
}
/// getSectionForConstant - Given a mergeable constant with the
@@ -366,7 +361,6 @@ static const MCSectionELF *getStaticStructorSection(MCContext &Ctx,
std::string Name;
unsigned Type;
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
- SectionKind Kind = SectionKind::getDataRel();
StringRef COMDAT = KeySym ? KeySym->getName() : "";
if (KeySym)
@@ -398,7 +392,7 @@ static const MCSectionELF *getStaticStructorSection(MCContext &Ctx,
Type = ELF::SHT_PROGBITS;
}
- return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT);
+ return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT);
}
const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
@@ -419,16 +413,10 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
if (!UseInitArray)
return;
- StaticCtorSection =
- getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
- StaticDtorSection =
- getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
+ StaticCtorSection = getContext().getELFSection(
+ ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ StaticDtorSection = getContext().getELFSection(
+ ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
}
//===----------------------------------------------------------------------===//
@@ -464,14 +452,15 @@ emitModuleFlags(MCStreamer &Streamer,
continue;
StringRef Key = MFE.Key->getString();
- Value *Val = MFE.Val;
+ Metadata *Val = MFE.Val;
if (Key == "Objective-C Image Info Version") {
- VersionVal = cast<ConstantInt>(Val)->getZExtValue();
+ VersionVal = mdconst::extract<ConstantInt>(Val)->getZExtValue();
} else if (Key == "Objective-C Garbage Collection" ||
Key == "Objective-C GC Only" ||
- Key == "Objective-C Is Simulated") {
- ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue();
+ Key == "Objective-C Is Simulated" ||
+ Key == "Objective-C Image Swift Version") {
+ ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue();
} else if (Key == "Objective-C Image Info Section") {
SectionVal = cast<MDString>(Val)->getString();
} else if (Key == "Linker Options") {
@@ -572,60 +561,6 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
return S;
}
-bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols(
- const MCSection &Section) const {
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
-
- // Sections holding 1 byte strings are atomized based on the data
- // they contain.
- // Sections holding 2 byte strings require symbols in order to be
- // atomized.
- // There is no dedicated section for 4 byte strings.
- if (SMO.getKind().isMergeable1ByteCString())
- return false;
-
- if (SMO.getSegmentName() == "__TEXT" &&
- SMO.getSectionName() == "__objc_classname" &&
- SMO.getType() == MachO::S_CSTRING_LITERALS)
- return false;
-
- if (SMO.getSegmentName() == "__TEXT" &&
- SMO.getSectionName() == "__objc_methname" &&
- SMO.getType() == MachO::S_CSTRING_LITERALS)
- return false;
-
- if (SMO.getSegmentName() == "__TEXT" &&
- SMO.getSectionName() == "__objc_methtype" &&
- SMO.getType() == MachO::S_CSTRING_LITERALS)
- return false;
-
- if (SMO.getSegmentName() == "__DATA" &&
- SMO.getSectionName() == "__cfstring")
- return false;
-
- // no_dead_strip sections are not atomized in practice.
- if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
- return false;
-
- switch (SMO.getType()) {
- default:
- return true;
-
- // These sections are atomized at the element boundaries without using
- // symbols.
- case MachO::S_4BYTE_LITERALS:
- case MachO::S_8BYTE_LITERALS:
- case MachO::S_16BYTE_LITERALS:
- case MachO::S_LITERAL_POINTERS:
- case MachO::S_NON_LAZY_SYMBOL_POINTERS:
- case MachO::S_LAZY_SYMBOL_POINTERS:
- case MachO::S_MOD_INIT_FUNC_POINTERS:
- case MachO::S_MOD_TERM_FUNC_POINTERS:
- case MachO::S_INTERPOSING:
- return false;
- }
-}
-
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM) const {
@@ -648,16 +583,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
- cast<GlobalVariable>(GV)) < 32)
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
return CStringSection;
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
- TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
- cast<GlobalVariable>(GV)) < 32)
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
return UStringSection;
// With MachO only variables whose corresponding symbol starts with 'l' or
@@ -854,7 +787,7 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
unsigned Characteristics = getCOFFSectionFlags(Kind);
StringRef Name = GV->getSection();
StringRef COMDATSymName = "";
- if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) {
+ if (GV->hasComdat()) {
Selection = getSelectionForCOFF(GV);
const GlobalValue *ComdatGV;
if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
@@ -901,12 +834,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
else
EmitUniquedSection = TM.getDataSections();
- // If this global is linkonce/weak and the target handles this by emitting it
- // into a 'uniqued' section name, create and return the section now.
- // Section names depend on the name of the symbol which is not feasible if the
- // symbol has private linkage.
- if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
- !Kind.isCommon()) {
+ if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) {
const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
unsigned Characteristics = getCOFFSectionFlags(Kind);
@@ -965,7 +893,7 @@ emitModuleFlags(MCStreamer &Streamer,
i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
const Module::ModuleFlagEntry &MFE = *i;
StringRef Key = MFE.Key->getString();
- Value *Val = MFE.Val;
+ Metadata *Val = MFE.Val;
if (Key == "Linker Options") {
LinkerOptions = cast<MDNode>(Val);
break;
@@ -982,21 +910,10 @@ emitModuleFlags(MCStreamer &Streamer,
MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
- StringRef Op = MDOption->getString();
// Lead with a space for consistency with our dllexport implementation.
- std::string Escaped(" ");
- if (Op.find(" ") != StringRef::npos) {
- // The PE-COFF spec says args with spaces must be quoted. It doesn't say
- // how to escape quotes, but it probably uses this algorithm:
- // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx
- // FIXME: Reuse escaping code from Support/Windows/Program.inc
- Escaped.push_back('\"');
- Escaped.append(Op);
- Escaped.push_back('\"');
- } else {
- Escaped.append(Op);
- }
- Streamer.EmitBytes(Escaped);
+ std::string Directive(" ");
+ Directive.append(MDOption->getString());
+ Streamer.EmitBytes(Directive);
}
}
}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index e218a83..1bbe6e1 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1515,9 +1515,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const TargetMachine &TM = MF->getTarget();
MRI = &MF->getRegInfo();
- TII = TM.getSubtargetImpl()->getInstrInfo();
- TRI = TM.getSubtargetImpl()->getRegisterInfo();
- InstrItins = TM.getSubtargetImpl()->getInstrItineraryData();
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ InstrItins = MF->getSubtarget().getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 7824f92..d393e10 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -88,7 +88,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
DeadBlocks[i]->eraseFromParent();
}
- return DeadBlocks.size();
+ return !DeadBlocks.empty();
}
@@ -204,5 +204,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
F.RenumberBlocks();
- return (DeadBlocks.size() || ModifiedPHI);
+ return (!DeadBlocks.empty() || ModifiedPHI);
}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 0d17d43..7d3b0ce 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -252,20 +252,41 @@ void VirtRegRewriter::addMBBLiveIns() {
unsigned PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
- // Scan the segments of LI.
- for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E;
- ++I) {
- if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn))
- continue;
- for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
- if (!LiveIn[i]->isLiveIn(PhysReg))
- LiveIn[i]->addLiveIn(PhysReg);
- LiveIn.clear();
+ if (LI.hasSubRanges()) {
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ for (const auto &Seg : S.segments) {
+ if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
+ continue;
+ for (MCSubRegIndexIterator SR(PhysReg, TRI); SR.isValid(); ++SR) {
+ unsigned SubReg = SR.getSubReg();
+ unsigned SubRegIndex = SR.getSubRegIndex();
+ unsigned SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex);
+ if ((SubRegLaneMask & S.LaneMask) == 0)
+ continue;
+ for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
+ if (!LiveIn[i]->isLiveIn(SubReg))
+ LiveIn[i]->addLiveIn(SubReg);
+ }
+ }
+ LiveIn.clear();
+ }
+ }
+ } else {
+ // Scan the segments of LI.
+ for (const auto &Seg : LI.segments) {
+ if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
+ continue;
+ for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
+ if (!LiveIn[i]->isLiveIn(PhysReg))
+ LiveIn[i]->addLiveIn(PhysReg);
+ LiveIn.clear();
+ }
}
}
}
void VirtRegRewriter::rewrite() {
+ bool NoSubRegLiveness = !MRI->tracksSubRegLiveness();
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
@@ -347,7 +368,8 @@ void VirtRegRewriter::rewrite() {
// A virtual register kill refers to the whole register, so we may
// have to add <imp-use,kill> operands for the super-register. A
// partial redef always kills and redefines the super-register.
- if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+ if (NoSubRegLiveness && MO.readsReg()
+ && (MO.isDef() || MO.isKill()))
SuperKills.push_back(PhysReg);
if (MO.isDef()) {
@@ -358,10 +380,12 @@ void VirtRegRewriter::rewrite() {
MO.setIsUndef(false);
// Also add implicit defs for the super-register.
- if (MO.isDead())
- SuperDeads.push_back(PhysReg);
- else
- SuperDefs.push_back(PhysReg);
+ if (NoSubRegLiveness) {
+ if (MO.isDead())
+ SuperDeads.push_back(PhysReg);
+ else
+ SuperDefs.push_back(PhysReg);
+ }
}
// PhysReg operands cannot have subregister indexes.
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
new file mode 100644
index 0000000..6f712a9
--- /dev/null
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -0,0 +1,626 @@
+//===-- WinEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers LLVM IR exception handling into something closer to what the
+// backend wants. It snifs the personality function to see which kind of
+// preparation is necessary. If the personality function uses the Itanium LSDA,
+// this pass delegates to the DWARF EH preparation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <memory>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "winehprepare"
+
+namespace {
+
+struct HandlerAllocas {
+ TinyPtrVector<AllocaInst *> Allocas;
+ int ParentFrameAllocationIndex;
+};
+
+// This map is used to model frame variable usage during outlining, to
+// construct a structure type to hold the frame variables in a frame
+// allocation block, and to remap the frame variable allocas (including
+// spill locations as needed) to GEPs that get the variable from the
+// frame allocation structure.
+typedef MapVector<AllocaInst *, HandlerAllocas> FrameVarInfoMap;
+
+class WinEHPrepare : public FunctionPass {
+ std::unique_ptr<FunctionPass> DwarfPrepare;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ WinEHPrepare(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), DwarfPrepare(createDwarfEHPass(TM)) {}
+
+ bool runOnFunction(Function &Fn) override;
+
+ bool doFinalization(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ const char *getPassName() const override {
+ return "Windows exception handling preparation";
+ }
+
+private:
+ bool prepareCPPEHHandlers(Function &F,
+ SmallVectorImpl<LandingPadInst *> &LPads);
+ bool outlineCatchHandler(Function *SrcFn, Constant *SelectorType,
+ LandingPadInst *LPad, CallInst *&EHAlloc,
+ AllocaInst *&EHObjPtr, FrameVarInfoMap &VarInfo);
+};
+
+class WinEHFrameVariableMaterializer : public ValueMaterializer {
+public:
+ WinEHFrameVariableMaterializer(Function *OutlinedFn,
+ FrameVarInfoMap &FrameVarInfo);
+ ~WinEHFrameVariableMaterializer() {}
+
+ virtual Value *materializeValueFor(Value *V) override;
+
+private:
+ FrameVarInfoMap &FrameVarInfo;
+ IRBuilder<> Builder;
+};
+
+class WinEHCatchDirector : public CloningDirector {
+public:
+ WinEHCatchDirector(LandingPadInst *LPI, Function *CatchFn, Value *Selector,
+ Value *EHObj, FrameVarInfoMap &VarInfo)
+ : LPI(LPI), CurrentSelector(Selector->stripPointerCasts()), EHObj(EHObj),
+ Materializer(CatchFn, VarInfo),
+ SelectorIDType(Type::getInt32Ty(LPI->getContext())),
+ Int8PtrType(Type::getInt8PtrTy(LPI->getContext())) {}
+
+ CloningAction handleInstruction(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+
+ ValueMaterializer *getValueMaterializer() override { return &Materializer; }
+
+private:
+ LandingPadInst *LPI;
+ Value *CurrentSelector;
+ Value *EHObj;
+ WinEHFrameVariableMaterializer Materializer;
+ Type *SelectorIDType;
+ Type *Int8PtrType;
+
+ const Value *ExtractedEHPtr;
+ const Value *ExtractedSelector;
+ const Value *EHPtrStoreAddr;
+ const Value *SelectorStoreAddr;
+};
+} // end anonymous namespace
+
+char WinEHPrepare::ID = 0;
+INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions",
+ false, false)
+
+FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
+ return new WinEHPrepare(TM);
+}
+
+static bool isMSVCPersonality(EHPersonality Pers) {
+ return Pers == EHPersonality::MSVC_Win64SEH ||
+ Pers == EHPersonality::MSVC_CXX;
+}
+
+bool WinEHPrepare::runOnFunction(Function &Fn) {
+ SmallVector<LandingPadInst *, 4> LPads;
+ SmallVector<ResumeInst *, 4> Resumes;
+ for (BasicBlock &BB : Fn) {
+ if (auto *LP = BB.getLandingPadInst())
+ LPads.push_back(LP);
+ if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator()))
+ Resumes.push_back(Resume);
+ }
+
+ // No need to prepare functions that lack landing pads.
+ if (LPads.empty())
+ return false;
+
+ // Classify the personality to see what kind of preparation we need.
+ EHPersonality Pers = classifyEHPersonality(LPads.back()->getPersonalityFn());
+
+ // Delegate through to the DWARF pass if this is unrecognized.
+ if (!isMSVCPersonality(Pers))
+ return DwarfPrepare->runOnFunction(Fn);
+
+ // FIXME: This only returns true if the C++ EH handlers were outlined.
+ // When that code is complete, it should always return whatever
+ // prepareCPPEHHandlers returns.
+ if (Pers == EHPersonality::MSVC_CXX && prepareCPPEHHandlers(Fn, LPads))
+ return true;
+
+ // FIXME: SEH Cleanups are unimplemented. Replace them with unreachable.
+ if (Resumes.empty())
+ return false;
+
+ for (ResumeInst *Resume : Resumes) {
+ IRBuilder<>(Resume).CreateUnreachable();
+ Resume->eraseFromParent();
+ }
+
+ return true;
+}
+
+bool WinEHPrepare::doFinalization(Module &M) {
+ return DwarfPrepare->doFinalization(M);
+}
+
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ DwarfPrepare->getAnalysisUsage(AU);
+}
+
+bool WinEHPrepare::prepareCPPEHHandlers(
+ Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
+ // These containers are used to re-map frame variables that are used in
+ // outlined catch and cleanup handlers. They will be populated as the
+ // handlers are outlined.
+ FrameVarInfoMap FrameVarInfo;
+ SmallVector<CallInst *, 4> HandlerAllocs;
+ SmallVector<AllocaInst *, 4> HandlerEHObjPtrs;
+
+ bool HandlersOutlined = false;
+
+ for (LandingPadInst *LPad : LPads) {
+ // Look for evidence that this landingpad has already been processed.
+ bool LPadHasActionList = false;
+ BasicBlock *LPadBB = LPad->getParent();
+ for (Instruction &Inst : LPadBB->getInstList()) {
+ // FIXME: Make this an intrinsic.
+ if (auto *Call = dyn_cast<CallInst>(&Inst))
+ if (Call->getCalledFunction()->getName() == "llvm.eh.actions") {
+ LPadHasActionList = true;
+ break;
+ }
+ }
+
+ // If we've already outlined the handlers for this landingpad,
+ // there's nothing more to do here.
+ if (LPadHasActionList)
+ continue;
+
+ for (unsigned Idx = 0, NumClauses = LPad->getNumClauses(); Idx < NumClauses;
+ ++Idx) {
+ if (LPad->isCatch(Idx)) {
+ // Create a new instance of the handler data structure in the
+ // HandlerData vector.
+ CallInst *EHAlloc = nullptr;
+ AllocaInst *EHObjPtr = nullptr;
+ bool Outlined = outlineCatchHandler(&F, LPad->getClause(Idx), LPad,
+ EHAlloc, EHObjPtr, FrameVarInfo);
+ if (Outlined) {
+ HandlersOutlined = true;
+ // These values must be resolved after all handlers have been
+ // outlined.
+ if (EHAlloc)
+ HandlerAllocs.push_back(EHAlloc);
+ if (EHObjPtr)
+ HandlerEHObjPtrs.push_back(EHObjPtr);
+ }
+ } // End if (isCatch)
+ } // End for each clause
+ } // End for each landingpad
+
+ // If nothing got outlined, there is no more processing to be done.
+ if (!HandlersOutlined)
+ return false;
+
+ // FIXME: We will replace the landingpad bodies with llvm.eh.actions
+ // calls and indirect branches here and then delete blocks
+ // which are no longer reachable. That will get rid of the
+ // handlers that we have outlined. There is code below
+ // that looks for allocas with no uses in the parent function.
+ // That will only happen after the pruning is implemented.
+
+ // Remap the frame variables.
+ SmallVector<Type *, 2> StructTys;
+ StructTys.push_back(Type::getInt32Ty(F.getContext())); // EH state
+ StructTys.push_back(Type::getInt8PtrTy(F.getContext())); // EH object
+
+ // Start the index at two since we always have the above fields at 0 and 1.
+ int Idx = 2;
+
+ // FIXME: Sort the FrameVarInfo vector by the ParentAlloca size and alignment
+ // and add padding as necessary to provide the proper alignment.
+
+ // Map the alloca instructions to the corresponding index in the
+ // frame allocation structure. If any alloca is used only in a single
+ // handler and is not used in the parent frame after outlining, it will
+ // be assigned an index of -1, meaning the handler can keep its
+ // "temporary" alloca and the original alloca can be erased from the
+ // parent function. If we later encounter this alloca in a second
+ // handler, we will assign it a place in the frame allocation structure
+ // at that time. Since the instruction replacement doesn't happen until
+ // all the entries in the HandlerData have been processed this isn't a
+ // problem.
+ for (auto &VarInfoEntry : FrameVarInfo) {
+ AllocaInst *ParentAlloca = VarInfoEntry.first;
+ HandlerAllocas &AllocaInfo = VarInfoEntry.second;
+
+ // If the instruction still has uses in the parent function or if it is
+ // referenced by more than one handler, add it to the frame allocation
+ // structure.
+ if (ParentAlloca->getNumUses() != 0 || AllocaInfo.Allocas.size() > 1) {
+ Type *VarTy = ParentAlloca->getAllocatedType();
+ StructTys.push_back(VarTy);
+ AllocaInfo.ParentFrameAllocationIndex = Idx++;
+ } else {
+ // If the variable is not used in the parent frame and it is only used
+ // in one handler, the alloca can be removed from the parent frame
+ // and the handler will keep its "temporary" alloca to define the value.
+ // An element index of -1 is used to indicate this condition.
+ AllocaInfo.ParentFrameAllocationIndex = -1;
+ }
+ }
+
+ // Having filled the StructTys vector and assigned an index to each element,
+ // we can now create the structure.
+ StructType *EHDataStructTy = StructType::create(
+ F.getContext(), StructTys, "struct." + F.getName().str() + ".ehdata");
+ IRBuilder<> Builder(F.getParent()->getContext());
+
+ // Create a frame allocation.
+ Module *M = F.getParent();
+ LLVMContext &Context = M->getContext();
+ BasicBlock *Entry = &F.getEntryBlock();
+ Builder.SetInsertPoint(Entry->getFirstInsertionPt());
+ Function *FrameAllocFn =
+ Intrinsic::getDeclaration(M, Intrinsic::frameallocate);
+ uint64_t EHAllocSize = M->getDataLayout()->getTypeAllocSize(EHDataStructTy);
+ Value *FrameAllocArgs[] = {
+ ConstantInt::get(Type::getInt32Ty(Context), EHAllocSize)};
+ CallInst *FrameAlloc =
+ Builder.CreateCall(FrameAllocFn, FrameAllocArgs, "frame.alloc");
+
+ Value *FrameEHData = Builder.CreateBitCast(
+ FrameAlloc, EHDataStructTy->getPointerTo(), "eh.data");
+
+ // Now visit each handler that is using the structure and bitcast its EHAlloc
+ // value to be a pointer to the frame alloc structure.
+ DenseMap<Function *, Value *> EHDataMap;
+ for (CallInst *EHAlloc : HandlerAllocs) {
+ // The EHAlloc has no uses at this time, so we need to just insert the
+ // cast before the next instruction. There is always a next instruction.
+ BasicBlock::iterator II = EHAlloc;
+ ++II;
+ Builder.SetInsertPoint(cast<Instruction>(II));
+ Value *EHData = Builder.CreateBitCast(
+ EHAlloc, EHDataStructTy->getPointerTo(), "eh.data");
+ EHDataMap[EHAlloc->getParent()->getParent()] = EHData;
+ }
+
+ // Next, replace the place-holder EHObjPtr allocas with GEP instructions
+ // that pull the EHObjPtr from the frame alloc structure
+ for (AllocaInst *EHObjPtr : HandlerEHObjPtrs) {
+ Value *EHData = EHDataMap[EHObjPtr->getParent()->getParent()];
+ Builder.SetInsertPoint(EHObjPtr);
+ Value *ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, 1);
+ EHObjPtr->replaceAllUsesWith(ElementPtr);
+ EHObjPtr->removeFromParent();
+ ElementPtr->takeName(EHObjPtr);
+ delete EHObjPtr;
+ }
+
+ // Finally, replace all of the temporary allocas for frame variables used in
+ // the outlined handlers and the original frame allocas with GEP instructions
+ // that get the equivalent pointer from the frame allocation struct.
+ for (auto &VarInfoEntry : FrameVarInfo) {
+ AllocaInst *ParentAlloca = VarInfoEntry.first;
+ HandlerAllocas &AllocaInfo = VarInfoEntry.second;
+ int Idx = AllocaInfo.ParentFrameAllocationIndex;
+
+ // If we have an index of -1 for this instruction, it means it isn't used
+ // outside of this handler. In that case, we just keep the "temporary"
+ // alloca in the handler and erase the original alloca from the parent.
+ if (Idx == -1) {
+ ParentAlloca->eraseFromParent();
+ } else {
+ // Otherwise, we replace the parent alloca and all outlined allocas
+ // which map to it with GEP instructions.
+
+ // First replace the original alloca.
+ Builder.SetInsertPoint(ParentAlloca);
+ Builder.SetCurrentDebugLocation(ParentAlloca->getDebugLoc());
+ Value *ElementPtr =
+ Builder.CreateConstInBoundsGEP2_32(FrameEHData, 0, Idx);
+ ParentAlloca->replaceAllUsesWith(ElementPtr);
+ ParentAlloca->removeFromParent();
+ ElementPtr->takeName(ParentAlloca);
+ delete ParentAlloca;
+
+ // Next replace all outlined allocas that are mapped to it.
+ for (AllocaInst *TempAlloca : AllocaInfo.Allocas) {
+ Value *EHData = EHDataMap[TempAlloca->getParent()->getParent()];
+ // FIXME: Sink this GEP into the blocks where it is used.
+ Builder.SetInsertPoint(TempAlloca);
+ Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
+ ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, Idx);
+ TempAlloca->replaceAllUsesWith(ElementPtr);
+ TempAlloca->removeFromParent();
+ ElementPtr->takeName(TempAlloca);
+ delete TempAlloca;
+ }
+ } // end else of if (Idx == -1)
+ } // End for each FrameVarInfo entry.
+
+ return HandlersOutlined;
+}
+
+bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType,
+ LandingPadInst *LPad, CallInst *&EHAlloc,
+ AllocaInst *&EHObjPtr,
+ FrameVarInfoMap &VarInfo) {
+ Module *M = SrcFn->getParent();
+ LLVMContext &Context = M->getContext();
+
+ // Create a new function to receive the handler contents.
+ Type *Int8PtrType = Type::getInt8PtrTy(Context);
+ std::vector<Type *> ArgTys;
+ ArgTys.push_back(Int8PtrType);
+ ArgTys.push_back(Int8PtrType);
+ FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false);
+ Function *CatchHandler = Function::Create(
+ FnType, GlobalVariable::ExternalLinkage, SrcFn->getName() + ".catch", M);
+
+ // Generate a standard prolog to setup the frame recovery structure.
+ IRBuilder<> Builder(Context);
+ BasicBlock *Entry = BasicBlock::Create(Context, "catch.entry");
+ CatchHandler->getBasicBlockList().push_front(Entry);
+ Builder.SetInsertPoint(Entry);
+ Builder.SetCurrentDebugLocation(LPad->getDebugLoc());
+
+ // The outlined handler will be called with the parent's frame pointer as
+ // its second argument. To enable the handler to access variables from
+ // the parent frame, we use that pointer to get locate a special block
+ // of memory that was allocated using llvm.eh.allocateframe for this
+ // purpose. During the outlining process we will determine which frame
+ // variables are used in handlers and create a structure that maps these
+ // variables into the frame allocation block.
+ //
+ // The frame allocation block also contains an exception state variable
+ // used by the runtime and a pointer to the exception object pointer
+ // which will be filled in by the runtime for use in the handler.
+ Function *RecoverFrameFn =
+ Intrinsic::getDeclaration(M, Intrinsic::framerecover);
+ Value *RecoverArgs[] = {Builder.CreateBitCast(SrcFn, Int8PtrType, ""),
+ &(CatchHandler->getArgumentList().back())};
+ EHAlloc = Builder.CreateCall(RecoverFrameFn, RecoverArgs, "eh.alloc");
+
+ // This alloca is only temporary. We'll be replacing it once we know all the
+ // frame variables that need to go in the frame allocation structure.
+ EHObjPtr = Builder.CreateAlloca(Int8PtrType, 0, "eh.obj.ptr");
+
+ // This will give us a raw pointer to the exception object, which
+ // corresponds to the formal parameter of the catch statement. If the
+ // handler uses this object, we will generate code during the outlining
+ // process to cast the pointer to the appropriate type and deference it
+ // as necessary. The un-outlined landing pad code represents the
+ // exception object as the result of the llvm.eh.begincatch call.
+ Value *EHObj = Builder.CreateLoad(EHObjPtr, false, "eh.obj");
+
+ ValueToValueMapTy VMap;
+
+ // FIXME: Map other values referenced in the filter handler.
+
+ WinEHCatchDirector Director(LPad, CatchHandler, SelectorType, EHObj, VarInfo);
+
+ SmallVector<ReturnInst *, 8> Returns;
+ ClonedCodeInfo InlinedFunctionInfo;
+
+ BasicBlock::iterator II = LPad;
+
+ CloneAndPruneIntoFromInst(CatchHandler, SrcFn, ++II, VMap,
+ /*ModuleLevelChanges=*/false, Returns, "",
+ &InlinedFunctionInfo,
+ SrcFn->getParent()->getDataLayout(), &Director);
+
+ // Move all the instructions in the first cloned block into our entry block.
+ BasicBlock *FirstClonedBB = std::next(Function::iterator(Entry));
+ Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList());
+ FirstClonedBB->eraseFromParent();
+
+ return true;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleInstruction(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // Intercept instructions which extract values from the landing pad aggregate.
+ if (auto *Extract = dyn_cast<ExtractValueInst>(Inst)) {
+ if (Extract->getAggregateOperand() == LPI) {
+ assert(Extract->getNumIndices() == 1 &&
+ "Unexpected operation: extracting both landing pad values");
+ assert((*(Extract->idx_begin()) == 0 || *(Extract->idx_begin()) == 1) &&
+ "Unexpected operation: extracting an unknown landing pad element");
+
+ if (*(Extract->idx_begin()) == 0) {
+ // Element 0 doesn't directly corresponds to anything in the WinEH
+ // scheme.
+ // It will be stored to a memory location, then later loaded and finally
+ // the loaded value will be used as the argument to an
+ // llvm.eh.begincatch
+ // call. We're tracking it here so that we can skip the store and load.
+ ExtractedEHPtr = Inst;
+ } else {
+ // Element 1 corresponds to the filter selector. We'll map it to 1 for
+ // matching purposes, but it will also probably be stored to memory and
+ // reloaded, so we need to track the instuction so that we can map the
+ // loaded value too.
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+ ExtractedSelector = Inst;
+ }
+
+ // Tell the caller not to clone this instruction.
+ return CloningDirector::SkipInstruction;
+ }
+ // Other extract value instructions just get cloned.
+ return CloningDirector::CloneInstruction;
+ }
+
+ if (auto *Store = dyn_cast<StoreInst>(Inst)) {
+ // Look for and suppress stores of the extracted landingpad values.
+ const Value *StoredValue = Store->getValueOperand();
+ if (StoredValue == ExtractedEHPtr) {
+ EHPtrStoreAddr = Store->getPointerOperand();
+ return CloningDirector::SkipInstruction;
+ }
+ if (StoredValue == ExtractedSelector) {
+ SelectorStoreAddr = Store->getPointerOperand();
+ return CloningDirector::SkipInstruction;
+ }
+
+ // Any other store just gets cloned.
+ return CloningDirector::CloneInstruction;
+ }
+
+ if (auto *Load = dyn_cast<LoadInst>(Inst)) {
+ // Look for loads of (previously suppressed) landingpad values.
+ // The EHPtr load can be ignored (it should only be used as
+ // an argument to llvm.eh.begincatch), but the selector value
+ // needs to be mapped to a constant value of 1 to be used to
+ // simplify the branching to always flow to the current handler.
+ const Value *LoadAddr = Load->getPointerOperand();
+ if (LoadAddr == EHPtrStoreAddr) {
+ VMap[Inst] = UndefValue::get(Int8PtrType);
+ return CloningDirector::SkipInstruction;
+ }
+ if (LoadAddr == SelectorStoreAddr) {
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+ return CloningDirector::SkipInstruction;
+ }
+
+ // Any other loads just get cloned.
+ return CloningDirector::CloneInstruction;
+ }
+
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) {
+ // The argument to the call is some form of the first element of the
+ // landingpad aggregate value, but that doesn't matter. It isn't used
+ // here.
+ // The return value of this instruction, however, is used to access the
+ // EH object pointer. We have generated an instruction to get that value
+ // from the EH alloc block, so we can just map to that here.
+ VMap[Inst] = EHObj;
+ return CloningDirector::SkipInstruction;
+ }
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) {
+ auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+ // It might be interesting to track whether or not we are inside a catch
+ // function, but that might make the algorithm more brittle than it needs
+ // to be.
+
+ // The end catch call can occur in one of two places: either in a
+ // landingpad
+ // block that is part of the catch handlers exception mechanism, or at the
+ // end of the catch block. If it occurs in a landing pad, we must skip it
+ // and continue so that the landing pad gets cloned.
+ // FIXME: This case isn't fully supported yet and shouldn't turn up in any
+ // of the test cases until it is.
+ if (IntrinCall->getParent()->isLandingPad())
+ return CloningDirector::SkipInstruction;
+
+ // If an end catch occurs anywhere else the next instruction should be an
+ // unconditional branch instruction that we want to replace with a return
+ // to the the address of the branch target.
+ const BasicBlock *EndCatchBB = IntrinCall->getParent();
+ const TerminatorInst *Terminator = EndCatchBB->getTerminator();
+ const BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
+ assert(Branch && Branch->isUnconditional());
+ assert(std::next(BasicBlock::const_iterator(IntrinCall)) ==
+ BasicBlock::const_iterator(Branch));
+
+ ReturnInst::Create(NewBB->getContext(),
+ BlockAddress::get(Branch->getSuccessor(0)), NewBB);
+
+ // We just added a terminator to the cloned block.
+ // Tell the caller to stop processing the current basic block so that
+ // the branch instruction will be skipped.
+ return CloningDirector::StopCloningBB;
+ }
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) {
+ auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+ Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
+ // This causes a replacement that will collapse the landing pad CFG based
+ // on the filter function we intend to match.
+ if (Selector == CurrentSelector)
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+ else
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+ // Tell the caller not to clone this instruction.
+ return CloningDirector::SkipInstruction;
+ }
+
+ // Continue with the default cloning behavior.
+ return CloningDirector::CloneInstruction;
+}
+
+WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
+ Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo)
+ : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
+ Builder.SetInsertPoint(&OutlinedFn->getEntryBlock());
+ // FIXME: Do something with the FrameVarMapped so that it is shared across the
+ // function.
+}
+
+Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
+ // If we're asked to materialize an alloca variable, we temporarily
+ // create a matching alloca in the outlined function. When all the
+ // outlining is complete, we'll collect these into a structure and
+ // replace these temporary allocas with GEPs referencing the frame
+ // allocation block.
+ if (auto *AV = dyn_cast<AllocaInst>(V)) {
+ AllocaInst *NewAlloca = Builder.CreateAlloca(
+ AV->getAllocatedType(), AV->getArraySize(), AV->getName());
+ FrameVarInfo[AV].Allocas.push_back(NewAlloca);
+ return NewAlloca;
+ }
+
+// FIXME: Do PHI nodes need special handling?
+
+// FIXME: Are there other cases we can handle better? GEP, ExtractValue, etc.
+
+// FIXME: This doesn't work during cloning because it finds an instruction
+// in the use list that isn't yet part of a basic block.
+#if 0
+ // If we're asked to remap some other instruction, we'll need to
+ // spill it to an alloca variable in the parent function and add a
+ // temporary alloca in the outlined function to be processed as
+ // described above.
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (Inst) {
+ AllocaInst *Spill = DemoteRegToStack(*Inst, true);
+ AllocaInst *NewAlloca = Builder.CreateAlloca(Spill->getAllocatedType(),
+ Spill->getArraySize());
+ FrameVarMap[AV] = NewAlloca;
+ return NewAlloca;
+ }
+#endif
+
+ return nullptr;
+}