aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorPirama Arumuga Nainar <pirama@google.com>2015-04-08 08:55:49 -0700
committerPirama Arumuga Nainar <pirama@google.com>2015-04-09 15:04:38 -0700
commit4c5e43da7792f75567b693105cc53e3f1992ad98 (patch)
tree1b2c9792582e12f5af0b1512e3094425f0dc0df9 /lib/CodeGen
parentc75239e6119d0f9a74c57099d91cbc9bde56bf33 (diff)
downloadexternal_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.zip
external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.gz
external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.bz2
Update aosp/master llvm for rebase to r233350
Change-Id: I07d935f8793ee8ec6b7da003f6483046594bca49
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/Analysis.cpp9
-rw-r--r--lib/CodeGen/Android.mk4
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp199
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp121
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterHandler.h4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp37
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h29
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp85
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp1
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h39
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp63
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp51
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp43
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h24
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp466
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h56
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h45
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp78
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h31
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp25
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp48
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h11
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp29
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h10
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp14
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp12
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp33
-rw-r--r--lib/CodeGen/BranchFolding.cpp61
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CodeGen.cpp20
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp231
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp109
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp35
-rw-r--r--lib/CodeGen/ForwardControlFlowIntegrity.cpp374
-rw-r--r--lib/CodeGen/IfConversion.cpp18
-rw-r--r--lib/CodeGen/InterferenceCache.cpp3
-rw-r--r--lib/CodeGen/InterferenceCache.h4
-rw-r--r--lib/CodeGen/JumpInstrTables.cpp296
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp79
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp13
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp11
-rw-r--r--lib/CodeGen/LiveInterval.cpp13
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp8
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp1
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp4
-rw-r--r--lib/CodeGen/LiveRangeCalc.h2
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp6
-rw-r--r--lib/CodeGen/LiveVariables.cpp1
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp12
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp431
-rw-r--r--lib/CodeGen/MachineCSE.cpp1
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp7
-rw-r--r--lib/CodeGen/MachineDominators.cpp66
-rw-r--r--lib/CodeGen/MachineFunction.cpp18
-rw-r--r--lib/CodeGen/MachineInstr.cpp67
-rw-r--r--lib/CodeGen/MachineLICM.cpp61
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp1
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp2
-rw-r--r--lib/CodeGen/MachineScheduler.cpp11
-rw-r--r--lib/CodeGen/MachineVerifier.cpp64
-rw-r--r--lib/CodeGen/PHIElimination.cpp21
-rw-r--r--lib/CodeGen/Passes.cpp50
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp6
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp60
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h78
-rw-r--r--lib/CodeGen/RegAllocBase.cpp1
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp3
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp83
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp7
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp170
-rw-r--r--lib/CodeGen/RegisterPressure.cpp2
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp107
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp901
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp154
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp101
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp76
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp735
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h45
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp79
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp9
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp35
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp3
-rw-r--r--lib/CodeGen/SlotIndexes.cpp2
-rw-r--r--lib/CodeGen/StackColoring.cpp2
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp64
-rw-r--r--lib/CodeGen/StackMaps.cpp186
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp12
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp45
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp120
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp155
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp63
-rw-r--r--lib/CodeGen/VirtRegMap.cpp2
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp1629
105 files changed, 4644 insertions, 4106 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index e50b846..8e11fe1 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -312,8 +312,7 @@ static const Value *getNoopInput(const Value *V,
// previous aggregate. Combine the two paths to obtain the true address of
// our element.
ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
- std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(),
- std::back_inserter(ValLoc));
+ ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
NoopInput = Op;
}
// Terminate if we couldn't find anything to look through.
@@ -601,10 +600,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// The manipulations performed when we're looking through an insertvalue or
// an extractvalue would happen at the front of the RetPath list, so since
// we have to copy it anyway it's more efficient to create a reversed copy.
- using std::copy;
- SmallVector<unsigned, 4> TmpRetPath, TmpCallPath;
- copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath));
- copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath));
+ SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
+ SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
// Finally, we can check whether the value produced by the tail call at this
// index is compatible with the value we return.
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index ec3cd77..2827d73 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -21,7 +21,6 @@ codegen_SRC_FILES := \
ExecutionDepsFix.cpp \
ExpandISelPseudos.cpp \
ExpandPostRAPseudos.cpp \
- ForwardControlFlowIntegrity.cpp \
GCMetadata.cpp \
GCMetadataPrinter.cpp \
GCRootLowering.cpp \
@@ -31,7 +30,6 @@ codegen_SRC_FILES := \
InlineSpiller.cpp \
InterferenceCache.cpp \
IntrinsicLowering.cpp \
- JumpInstrTables.cpp \
LatencyPriorityQueue.cpp \
LexicalScopes.cpp \
LiveDebugVariables.cpp \
@@ -53,6 +51,7 @@ codegen_SRC_FILES := \
MachineCombiner.cpp \
MachineCopyPropagation.cpp \
MachineCSE.cpp \
+ MachineDominanceFrontier.cpp \
MachineDominators.cpp \
MachineFunctionAnalysis.cpp \
MachineFunction.cpp \
@@ -66,6 +65,7 @@ codegen_SRC_FILES := \
MachineModuleInfoImpls.cpp \
MachinePassRegistry.cpp \
MachinePostDominators.cpp \
+ MachineRegionInfo.cpp \
MachineRegisterInfo.cpp \
MachineScheduler.cpp \
MachineSink.cpp \
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 6fe75ad..9a16e15 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -36,8 +36,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
-ARMException::ARMException(AsmPrinter *A)
- : EHStreamer(A), shouldEmitCFI(false) {}
+ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
ARMException::~ARMException() {}
@@ -53,13 +52,9 @@ void ARMException::endModule() {
Asm->OutStreamer.EmitCFISections(false, true);
}
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
void ARMException::beginFunction(const MachineFunction *MF) {
if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
getTargetStreamer().emitFnStart();
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
// See if we need call frame info.
AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
assert(MoveType != AsmPrinter::CFI_M_EH &&
@@ -72,20 +67,12 @@ void ARMException::beginFunction(const MachineFunction *MF) {
/// endFunction - Gather and emit post-function exception information.
///
-void ARMException::endFunction(const MachineFunction *) {
- if (shouldEmitCFI)
- Asm->OutStreamer.EmitCFIEndProc();
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
-
+void ARMException::endFunction(const MachineFunction *MF) {
ARMTargetStreamer &ATS = getTargetStreamer();
if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
MMI->getLandingPads().empty())
ATS.emitCantUnwind();
else {
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
- Asm->getFunctionNumber()));
if (!MMI->getLandingPads().empty()) {
// Emit references to personality.
if (const Function *Personality = MMI->getPersonality()) {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 988381d..07d6731 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -77,11 +77,11 @@ static gcp_map_type &getGCMap(void *&P) {
/// getGVAlignmentLog2 - Return the alignment to use for the specified global
/// value in log2 form. This rounds up to the preferred alignment if possible
/// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
unsigned InBits = 0) {
unsigned NumBits = 0;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- NumBits = TD.getPreferredAlignmentLog(GVar);
+ NumBits = DL.getPreferredAlignmentLog(GVar);
// If InBits is specified, round it to it.
if (InBits > NumBits)
@@ -103,12 +103,14 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
OutContext(Streamer->getContext()), OutStreamer(*Streamer.release()),
- LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) {
+ LastMI(nullptr), LastFn(0), Counter(~0U) {
DD = nullptr;
MMI = nullptr;
LI = nullptr;
MF = nullptr;
- CurrentFnSym = CurrentFnSymForSize = nullptr;
+ CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr;
+ CurrentFnBegin = nullptr;
+ CurrentFnEnd = nullptr;
GCMetadataPrinters = nullptr;
VerboseAsm = OutStreamer.isVerboseAsm();
}
@@ -219,9 +221,13 @@ bool AsmPrinter::doInitialization(Module &M) {
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
+ // We're at the module level. Construct MCSubtarget from the default CPU
+ // and target triple.
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
OutStreamer.AddComment("Start of file scope inline assembly");
OutStreamer.AddBlankLine();
- EmitInlineAsm(M.getModuleInlineAsm()+"\n");
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI);
OutStreamer.AddComment("End of file scope inline assembly");
OutStreamer.AddBlankLine();
}
@@ -525,7 +531,8 @@ void AsmPrinter::EmitFunctionHeader() {
EmitVisibility(CurrentFnSym, F->getVisibility());
EmitLinkage(F, CurrentFnSym);
- EmitAlignment(MF->getAlignment(), F);
+ if (MAI->hasFunctionAlignment())
+ EmitAlignment(MF->getAlignment(), F);
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
@@ -554,6 +561,17 @@ void AsmPrinter::EmitFunctionHeader() {
OutStreamer.EmitLabel(DeadBlockSyms[i]);
}
+ if (CurrentFnBegin) {
+ if (MAI->useAssignmentForEHBegin()) {
+ MCSymbol *CurPos = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(CurPos);
+ OutStreamer.EmitAssignment(CurrentFnBegin,
+ MCSymbolRefExpr::Create(CurPos, OutContext));
+ } else {
+ OutStreamer.EmitLabel(CurrentFnBegin);
+ }
+ }
+
// Emit pre-function debug and/or EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
@@ -764,6 +782,8 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
void AsmPrinter::EmitFunctionBody() {
+ EmitFunctionHeader();
+
// Emit target-specific gunk before the function body.
EmitFunctionBodyStart();
@@ -867,32 +887,41 @@ void AsmPrinter::EmitFunctionBody() {
// Emit target-specific gunk after the function body.
EmitFunctionBodyEnd();
+ if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+ MAI->hasDotTypeDotSizeDirective()) {
+ // Create a symbol for the end of function.
+ CurrentFnEnd = createTempSymbol("func_end");
+ OutStreamer.EmitLabel(CurrentFnEnd);
+ }
+
// If the target wants a .size directive for the size of the function, emit
// it.
if (MAI->hasDotTypeDotSizeDirective()) {
- // Create a symbol for the end of function, so we can get the size as
- // difference between the function label and the temp label.
- MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
- OutStreamer.EmitLabel(FnEndLabel);
-
+ // We can get the size as difference between the function label and the
+ // temp label.
const MCExpr *SizeExp =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext),
MCSymbolRefExpr::Create(CurrentFnSymForSize,
OutContext),
OutContext);
OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
}
- // Emit post-function debug and/or EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
- HI.Handler->endFunction(MF);
+ HI.Handler->markFunctionEnd();
}
- MMI->EndFunction();
// Print out jump tables referenced by the function.
EmitJumpTableInfo();
+ // Emit post-function debug and/or EH information.
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->endFunction(MF);
+ }
+ MMI->EndFunction();
+
OutStreamer.AddBlankLine();
}
@@ -928,7 +957,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
// To be a got equivalent, at least one of its users need to be a constant
// expression used by another global variable.
for (auto *U : GV->users())
- NumGOTEquivUsers += getNumGlobalVariableUses(cast<Constant>(U));
+ NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U));
return NumGOTEquivUsers > 0;
}
@@ -961,17 +990,25 @@ void AsmPrinter::emitGlobalGOTEquivs() {
if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
return;
- while (!GlobalGOTEquivs.empty()) {
- DenseMap<const MCSymbol *, GOTEquivUsePair>::iterator I =
- GlobalGOTEquivs.begin();
- const MCSymbol *S = I->first;
- const GlobalVariable *GV = I->second.first;
- GlobalGOTEquivs.erase(S);
- EmitGlobalVariable(GV);
+ SmallVector<const GlobalVariable *, 8> FailedCandidates;
+ for (auto &I : GlobalGOTEquivs) {
+ const GlobalVariable *GV = I.second.first;
+ unsigned Cnt = I.second.second;
+ if (Cnt)
+ FailedCandidates.push_back(GV);
}
+ GlobalGOTEquivs.clear();
+
+ for (auto *GV : FailedCandidates)
+ EmitGlobalVariable(GV);
}
bool AsmPrinter::doFinalization(Module &M) {
+ // Set the MachineFunction to nullptr so that we can catch attempted
+ // accesses to MF specific features at the module level and so that
+ // we can conditionalize accesses based on whether or not it is nullptr.
+ MF = nullptr;
+
// Gather all GOT equivalent globals in the module. We really need two
// passes over the globals: one to compute and another to avoid its emission
// in EmitGlobalVariable, otherwise we would not be able to handle cases
@@ -997,59 +1034,6 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, V, false);
}
- // Get information about jump-instruction tables to print.
- JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>();
-
- if (JITI && !JITI->getTables().empty()) {
- // Since we're at the module level we can't use a function specific
- // MCSubtargetInfo - instead create one with the module defaults.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
- unsigned Arch = Triple(getTargetTriple()).getArch();
- bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
- const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
- MCInst TrapInst;
- TII->getTrap(TrapInst);
- unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment());
-
- // Emit the right section for these functions.
- OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
- for (const auto &KV : JITI->getTables()) {
- uint64_t Count = 0;
- for (const auto &FunPair : KV.second) {
- // Emit the function labels to make this be a function entry point.
- MCSymbol *FunSym =
- OutContext.GetOrCreateSymbol(FunPair.second->getName());
- EmitAlignment(LogAlignment);
- if (IsThumb)
- OutStreamer.EmitThumbFunc(FunSym);
- if (MAI->hasDotTypeDotSizeDirective())
- OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction);
- OutStreamer.EmitLabel(FunSym);
-
- // Emit the jump instruction to transfer control to the original
- // function.
- MCInst JumpToFun;
- MCSymbol *TargetSymbol =
- OutContext.GetOrCreateSymbol(FunPair.first->getName());
- const MCSymbolRefExpr *TargetSymRef =
- MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
- OutContext);
- TII->getUnconditionalBranch(JumpToFun, TargetSymRef);
- OutStreamer.EmitInstruction(JumpToFun, *STI);
- ++Count;
- }
-
- // Emit enough padding instructions to fill up to the next power of two.
- uint64_t Remaining = NextPowerOf2(Count) - Count;
- for (uint64_t C = 0; C < Remaining; ++C) {
- EmitAlignment(LogAlignment);
- OutStreamer.EmitInstruction(TrapInst, *STI);
- }
-
- }
- }
-
// Emit module flags.
SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
M.getModuleFlagsMetadata(ModuleFlags);
@@ -1152,11 +1136,26 @@ bool AsmPrinter::doFinalization(Module &M) {
return false;
}
+MCSymbol *AsmPrinter::getCurExceptionSym() {
+ if (!CurExceptionSym)
+ CurExceptionSym = createTempSymbol("exception");
+ return CurExceptionSym;
+}
+
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
// Get the function symbol.
CurrentFnSym = getSymbol(MF.getFunction());
CurrentFnSymForSize = CurrentFnSym;
+ CurrentFnBegin = nullptr;
+ CurExceptionSym = nullptr;
+ bool NeedsLocalForSize = MAI->needsLocalForSize();
+ if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+ NeedsLocalForSize) {
+ CurrentFnBegin = createTempSymbol("func_begin");
+ if (NeedsLocalForSize)
+ CurrentFnSymForSize = CurrentFnBegin;
+ }
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
@@ -1273,10 +1272,8 @@ void AsmPrinter::EmitJumpTableInfo() {
bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
*F);
- if (!JTInDiffSection) {
- OutStreamer.SwitchSection(TLOF.SectionForGlobal(F, *Mang, TM));
- } else {
- // Otherwise, drop it in the readonly section.
+ if (JTInDiffSection) {
+ // Drop it in the readonly section.
const MCSection *ReadOnlySection =
TLOF.getSectionForJumpTable(*F, *Mang, TM);
OutStreamer.SwitchSection(ReadOnlySection);
@@ -1585,7 +1582,7 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
}
// Otherwise, emit with .set (aka assignment).
- MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ MCSymbol *SetLabel = createTempSymbol("set");
OutStreamer.EmitAssignment(SetLabel, Diff);
OutStreamer.EmitSymbolValue(SetLabel, Size);
}
@@ -1667,8 +1664,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(
- CE, TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
if (C != CE)
return lowerConstant(C);
@@ -2112,9 +2108,15 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
//
// gotpcrelcst := <offset from @foo base> + <cst>
//
+ // If gotpcrelcst is positive it means that we can safely fold the pc rel
+ // displacement into the GOTPCREL. We can also can have an extra offset <cst>
+ // if the target knows how to encode it.
+ //
int64_t GOTPCRelCst = Offset + MV.getConstant();
if (GOTPCRelCst < 0)
return;
+ if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0)
+ return;
// Emit the GOT PC relative to replace the got equivalent global, i.e.:
//
@@ -2134,18 +2136,16 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
//
AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
const GlobalVariable *GV = Result.first;
- unsigned NumUses = Result.second;
+ int NumUses = (int)Result.second;
const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
- *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym,
- GOTPCRelCst);
+ *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(
+ FinalSym, MV, Offset, AP.MMI, AP.OutStreamer);
// Update GOT equivalent usage information
--NumUses;
- if (NumUses)
+ if (NumUses >= 0)
AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
- else
- AP.GlobalGOTEquivs.erase(GOTEquivSym);
}
static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
@@ -2206,7 +2206,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
// If the constant expression's size is greater than 64-bits, then we have
// to emit the value in chunks. Try to constant fold the value and emit it
// that way.
- Constant *New = ConstantFoldConstantExpression(CE, DL);
+ Constant *New = ConstantFoldConstantExpression(CE, *DL);
if (New && New != CE)
return emitGlobalConstantImpl(New, AP);
}
@@ -2257,23 +2257,10 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
// Symbol Lowering Routines.
//===----------------------------------------------------------------------===//
-/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
-/// temporary label with the specified stem and unique ID.
-MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name, unsigned ID) const {
- const DataLayout *DL = TM.getDataLayout();
- return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
- Name + Twine(ID));
-}
-
-/// GetTempSymbol - Return an assembler temporary label with the specified
-/// stem.
-MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name) const {
- const DataLayout *DL = TM.getDataLayout();
- return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
- Name);
+MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const {
+ return OutContext.createTempSymbol(Name, true);
}
-
MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
return MMI->getAddrLabelSymbol(BA->getBasicBlock());
}
@@ -2523,3 +2510,5 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
/// Pin vtable to this file.
AsmPrinterHandler::~AsmPrinterHandler() {}
+
+void AsmPrinterHandler::markFunctionEnd() {}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index d0958c1..9de36da 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -12,9 +12,12 @@
//===----------------------------------------------------------------------===//
#include "ByteStreamer.h"
+#include "DwarfDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -27,29 +30,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
- BS.EmitInt8(
- Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
- : dwarf::OperationEncodingString(Op));
-}
-
-void DebugLocDwarfExpression::EmitSigned(int Value) {
- BS.EmitSLEB128(Value, Twine(Value));
-}
-
-void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) {
- BS.EmitULEB128(Value, Twine(Value));
-}
-
-bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
- // This information is not available while emitting .debug_loc entries.
- return false;
-}
-
//===----------------------------------------------------------------------===//
// Dwarf Emission Helper Routines
//===----------------------------------------------------------------------===//
@@ -178,57 +163,28 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
///
/// SectionLabel is a temporary label emitted at the start of the section that
/// Label lives in.
-void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
- const MCSymbol *SectionLabel) const {
+void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const {
// On COFF targets, we have to emit the special .secrel32 directive.
if (MAI->needsDwarfSectionOffsetDirective()) {
OutStreamer.EmitCOFFSecRel32(Label);
return;
}
- // Get the section that we're referring to, based on SectionLabel.
- const MCSection &Section = SectionLabel->getSection();
-
- // If Label has already been emitted, verify that it is in the same section as
- // section label for sanity.
- assert((!Label->isInSection() || &Label->getSection() == &Section) &&
- "Section offset using wrong section base for label");
-
- // If the section in question will end up with an address of 0 anyway, we can
- // just emit an absolute reference to save a relocation.
- if (Section.isBaseAddressKnownZero()) {
+ // If the format uses relocations with dwarf, refer to the symbol directly.
+ if (MAI->doesDwarfUseRelocationsAcrossSections()) {
OutStreamer.EmitSymbolValue(Label, 4);
return;
}
// Otherwise, emit it as a label difference from the start of the section.
- EmitLabelDifference(Label, SectionLabel, 4);
-}
-
-// Some targets do not provide a DWARF register number for every
-// register. This function attempts to emit a DWARF register by
-// emitting a piece of a super-register or by piecing together
-// multiple subregisters that alias the register.
-void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
- const MachineLocation &MLoc,
- unsigned PieceSizeInBits,
- unsigned PieceOffsetInBits) const {
- assert(MLoc.isReg() && "MLoc must be a register");
- DebugLocDwarfExpression Expr(*this, Streamer);
- Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits);
-}
-
-void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer,
- unsigned PieceSizeInBits,
- unsigned PieceOffsetInBits) const {
- DebugLocDwarfExpression Expr(*this, Streamer);
- Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
+ EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
}
/// EmitDwarfRegOp - Emit dwarf register operation.
void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
const MachineLocation &MLoc) const {
- DebugLocDwarfExpression Expr(*this, Streamer);
+ DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(),
+ getDwarfDebug()->getDwarfVersion(), Streamer);
const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
if (Reg < 0) {
@@ -285,3 +241,60 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
break;
}
}
+
+void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
+ // Get the abbreviation for this DIE.
+ const DIEAbbrev &Abbrev = Die.getAbbrev();
+
+ // Emit the code (index) for the abbreviation.
+ if (isVerbose())
+ OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
+ "] 0x" + Twine::utohexstr(Die.getOffset()) +
+ ":0x" + Twine::utohexstr(Die.getSize()) + " " +
+ dwarf::TagString(Abbrev.getTag()));
+ EmitULEB128(Abbrev.getNumber());
+
+ const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ dwarf::Attribute Attr = AbbrevData[i].getAttribute();
+ dwarf::Form Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ if (isVerbose()) {
+ OutStreamer.AddComment(dwarf::AttributeString(Attr));
+ if (Attr == dwarf::DW_AT_accessibility)
+ OutStreamer.AddComment(dwarf::AccessibilityString(
+ cast<DIEInteger>(Values[i])->getValue()));
+ }
+
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(this, Form);
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev.hasChildren()) {
+ for (auto &Child : Die.getChildren())
+ emitDwarfDIE(*Child);
+
+ OutStreamer.AddComment("End Of Children Mark");
+ EmitInt8(0);
+ }
+}
+
+void
+AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const {
+ // For each abbrevation.
+ for (const DIEAbbrev *Abbrev : Abbrevs) {
+ // Emit the abbrevations code (base 1 index.)
+ EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(this);
+ }
+
+ // Mark end of abbreviations.
+ EmitULEB128(0, "EOM(3)");
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index 31867dd..f1efe9d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -41,6 +41,10 @@ public:
/// call.
virtual void beginFunction(const MachineFunction *MF) = 0;
+ // \brief Emit any of function marker (like .cfi_endproc). This is called
+ // before endFunction and cannot switch sections.
+ virtual void markFunctionEnd();
+
/// \brief Gather post-function debug information.
/// Please note that some AsmPrinter implementations may not call
/// beginFunction at all.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index e6e7c97..bf63b1b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -73,7 +73,8 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
}
/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+ const MDNode *LocMDNode,
InlineAsm::AsmDialect Dialect) const {
assert(!Str.empty() && "Can't emit empty inline asm block");
@@ -93,17 +94,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
!OutStreamer.isIntegratedAssemblerRequired()) {
emitInlineAsmStart();
OutStreamer.EmitRawText(Str);
- // If we have a machine function then grab the MCSubtarget off of that,
- // otherwise we're at the module level and want to construct one from
- // the default CPU and target triple.
- if (MF) {
- emitInlineAsmEnd(MF->getSubtarget<MCSubtargetInfo>(), nullptr);
- } else {
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple(), TM.getTargetCPU(),
- TM.getTargetFeatureString()));
- emitInlineAsmEnd(*STI, nullptr);
- }
+ emitInlineAsmEnd(STI, nullptr);
return;
}
@@ -135,19 +126,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI));
- // Initialize the parser with a fresh subtarget info. It is better to use a
- // new STI here because the parser may modify it and we do not want those
- // modifications to persist after parsing the inlineasm. The modifications
- // made by the parser will be seen by the code emitters because it passes
- // the current STI down to the EncodeInstruction() method.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
-
- // Preserve a copy of the original STI because the parser may modify it. For
- // example, when switching between arm and thumb mode. If the target needs to
- // emit code to return to the original state it can do so in
+ // Create a temporary copy of the original STI because the parser may modify
+ // it. For example, when switching between arm and thumb mode. If the target
+ // needs to emit code to return to the original state it can do so in
// emitInlineAsmEnd().
- MCSubtargetInfo STIOrig = *STI;
+ MCSubtargetInfo TmpSTI = STI;
// We create a new MCInstrInfo here since we might be at the module level
// and not have a MachineFunction to initialize the TargetInstrInfo from and
@@ -155,7 +138,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
// because it's not subtarget dependent.
std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
- *STI, *Parser, *MII, TM.Options.MCOptions));
+ TmpSTI, *Parser, *MII, TM.Options.MCOptions));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
@@ -170,7 +153,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
// Don't implicitly switch to the text section before the asm.
int Res = Parser->Run(/*NoInitialTextSection*/ true,
/*NoFinalize*/ true);
- emitInlineAsmEnd(STIOrig, STI.get());
+ emitInlineAsmEnd(STI, &TmpSTI);
if (Res && !HasDiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
@@ -505,7 +488,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
else
EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
- EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
+ EmitInlineAsm(OS.str(), getSubtargetInfo(), LocMD, MI->getInlineAsmDialect());
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
// enabled, so we use emitRawComment.
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 42be114..179a4d4 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -19,6 +19,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/LEB128.h"
+#include <string>
namespace llvm {
class ByteStreamer {
@@ -66,6 +68,33 @@ class HashingByteStreamer : public ByteStreamer {
Hash.addULEB128(DWord);
}
};
+
+class BufferByteStreamer : public ByteStreamer {
+private:
+ SmallVectorImpl<char> &Buffer;
+ // FIXME: This is actually only needed for textual asm output.
+ SmallVectorImpl<std::string> &Comments;
+
+public:
+ BufferByteStreamer(SmallVectorImpl<char> &Buffer,
+ SmallVectorImpl<std::string> &Comments)
+ : Buffer(Buffer), Comments(Comments) {}
+ void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ Buffer.push_back(Byte);
+ Comments.push_back(Comment.str());
+ }
+ void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ raw_svector_ostream OSE(Buffer);
+ encodeSLEB128(DWord, OSE);
+ Comments.push_back(Comment.str());
+ }
+ void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ raw_svector_ostream OSE(Buffer);
+ encodeULEB128(DWord, OSE);
+ Comments.push_back(Comment.str());
+ }
+};
+
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 64ba56b..1a706f7 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -60,7 +61,7 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
/// Emit - Print the abbreviation using the specified asm printer.
///
-void DIEAbbrev::Emit(AsmPrinter *AP) const {
+void DIEAbbrev::Emit(const AsmPrinter *AP) const {
// Emit its Dwarf tag type.
AP->EmitULEB128(Tag, dwarf::TagString(Tag));
@@ -204,7 +205,7 @@ void DIEValue::dump() const {
/// EmitValue - Emit integer of appropriate size.
///
-void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
unsigned Size = ~0U;
switch (Form) {
case dwarf::DW_FORM_flag_present:
@@ -218,6 +219,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_ref2: // Fall thru
case dwarf::DW_FORM_data2: Size = 2; break;
case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_strp: // Fall thru
case dwarf::DW_FORM_ref4: // Fall thru
case dwarf::DW_FORM_data4: Size = 4; break;
case dwarf::DW_FORM_ref8: // Fall thru
@@ -229,6 +231,9 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
case dwarf::DW_FORM_addr:
Size = Asm->getDataLayout().getPointerSize(); break;
+ case dwarf::DW_FORM_ref_addr:
+ Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
+ break;
default: llvm_unreachable("DIE Value form not supported yet");
}
Asm->OutStreamer.EmitIntValue(Integer, Size);
@@ -236,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of integer value in bytes.
///
-unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_flag_present: return 0;
case dwarf::DW_FORM_flag: // Fall thru
@@ -245,6 +250,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_ref2: // Fall thru
case dwarf::DW_FORM_data2: return sizeof(int16_t);
case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_strp: // Fall thru
case dwarf::DW_FORM_ref4: // Fall thru
case dwarf::DW_FORM_data4: return sizeof(int32_t);
case dwarf::DW_FORM_ref8: // Fall thru
@@ -255,6 +261,10 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
+ case dwarf::DW_FORM_ref_addr:
+ if (AP->OutStreamer.getContext().getDwarfVersion() == 2)
+ return AP->getDataLayout().getPointerSize();
+ return sizeof(int32_t);
default: llvm_unreachable("DIE Value form not supported yet");
}
}
@@ -272,13 +282,13 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
-void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
///
-unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -298,7 +308,7 @@ void DIEExpr::print(raw_ostream &O) const {
/// EmitValue - Emit label value.
///
-void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelReference(Label, SizeOf(AP, Form),
Form == dwarf::DW_FORM_strp ||
Form == dwarf::DW_FORM_sec_offset ||
@@ -307,7 +317,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
/// SizeOf - Determine size of label value in bytes.
///
-unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -326,13 +336,13 @@ void DIELabel::print(raw_ostream &O) const {
/// EmitValue - Emit delta value.
///
-void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
}
/// SizeOf - Determine size of delta value in bytes.
///
-unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -351,13 +361,13 @@ void DIEDelta::print(raw_ostream &O) const {
/// EmitValue - Emit string value.
///
-void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
Access->EmitValue(AP, Form);
}
/// SizeOf - Determine size of delta value in bytes.
///
-unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return Access->SizeOf(AP, Form);
}
@@ -372,32 +382,9 @@ void DIEString::print(raw_ostream &O) const {
// DIEEntry Implementation
//===----------------------------------------------------------------------===//
-/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the
-/// directive is specified by Size and Hi/Lo specify the labels.
-static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi,
- uint64_t Offset, const MCSymbol *Lo,
- unsigned Size) {
- MCContext &Context = Streamer.getContext();
-
- // Emit Hi+Offset - Lo
- // Get the Hi+Offset expression.
- const MCExpr *Plus =
- MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context),
- MCConstantExpr::Create(Offset, Context), Context);
-
- // Get the Hi+Offset-Lo expression.
- const MCExpr *Diff = MCBinaryExpr::CreateSub(
- Plus, MCSymbolRefExpr::Create(Lo, Context), Context);
-
- // Otherwise, emit with .set (aka assignment).
- MCSymbol *SetLabel = Context.CreateTempSymbol();
- Streamer.EmitAssignment(SetLabel, Diff);
- Streamer.EmitSymbolValue(SetLabel, Size);
-}
-
/// EmitValue - Emit debug information entry offset.
///
-void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_ref_addr) {
const DwarfDebug *DD = AP->getDwarfDebug();
@@ -413,14 +400,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
DIEEntry::getRefAddrSize(AP));
else
- emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr,
- CU->getSectionSym(),
- DIEEntry::getRefAddrSize(AP));
+ AP->OutStreamer.EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
} else
AP->EmitInt32(Entry.getOffset());
}
-unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
+unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
// DWARF4: References that use the attribute form DW_FORM_ref_addr are
// specified to be four bytes in the DWARF 32-bit format and eight bytes
// in the DWARF 64-bit format, while DWARF Version 2 specifies that such
@@ -441,7 +426,7 @@ void DIEEntry::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
// DIETypeSignature Implementation
//===----------------------------------------------------------------------===//
-void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIETypeSignature::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
assert(Form == dwarf::DW_FORM_ref_sig8);
Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8);
}
@@ -460,7 +445,7 @@ void DIETypeSignature::dump() const { print(dbgs()); }
/// ComputeSize - calculate the size of the location expression.
///
-unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
+unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -472,7 +457,7 @@ unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
/// EmitValue - Emit location data.
///
-void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
@@ -490,7 +475,7 @@ void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of location data in bytes.
///
-unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -515,7 +500,7 @@ void DIELoc::print(raw_ostream &O) const {
/// ComputeSize - calculate the size of the block.
///
-unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
+unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -527,7 +512,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
/// EmitValue - Emit block data.
///
-void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
@@ -543,7 +528,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of block data in bytes.
///
-unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -564,7 +549,7 @@ void DIEBlock::print(raw_ostream &O) const {
// DIELocList Implementation
//===----------------------------------------------------------------------===//
-unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4)
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
@@ -574,14 +559,14 @@ unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
/// EmitValue - Emit label value.
///
-void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
DwarfDebug *DD = AP->getDwarfDebug();
MCSymbol *Label = DD->getDebugLocEntries()[Index].Label;
if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf())
- AP->EmitSectionOffset(Label, DD->getDebugLocSym());
+ AP->emitSectionOffset(Label);
else
- AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4);
+ AP->EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 1e2ba2c..da7252a 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -510,7 +510,7 @@ uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) {
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, swap bytes
// appropriately.
- return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+ return support::endian::read64le(Result + 8);
}
/// This is based on the type signature computation given in section 7.27 of the
@@ -531,7 +531,7 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) {
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, swap bytes
// appropriately.
- return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+ return support::endian::read64le(Result + 8);
}
/// This is based on the type signature computation given in section 7.27 of the
@@ -555,5 +555,5 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, swap bytes
// appropriately.
- return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+ return support::endian::read64le(Result + 8);
}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 0c2a5e5..bbdf237 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <map>
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 6d55c03..6914bbe 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -9,22 +9,24 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
namespace llvm {
+class AsmPrinter;
class MDNode;
/// \brief This struct describes location entries emitted in the .debug_loc
/// section.
class DebugLocEntry {
- // Begin and end symbols for the address range that this location is valid.
+ /// Begin and end symbols for the address range that this location is valid.
const MCSymbol *Begin;
const MCSymbol *End;
public:
- /// A single location or constant.
+ /// \brief A single location or constant.
struct Value {
Value(const MDNode *Var, const MDNode *Expr, int64_t i)
: Variable(Var), Expression(Expr), EntryKind(E_Integer) {
@@ -41,20 +43,20 @@ public:
Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc)
: Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) {
assert(DIVariable(Var).Verify());
- assert(DIExpression(Expr).Verify());
+ assert(DIExpression(Expr)->isValid());
}
- // The variable to which this location entry corresponds.
+ /// The variable to which this location entry corresponds.
const MDNode *Variable;
- // Any complex address location expression for this Value.
+ /// Any complex address location expression for this Value.
const MDNode *Expression;
- // Type of entry that this represents.
+ /// Type of entry that this represents.
enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
enum EntryType EntryKind;
- // Either a constant,
+ /// Either a constant,
union {
int64_t Int;
const ConstantFP *CFP;
@@ -84,6 +86,8 @@ private:
/// A nonempty list of locations/constants belonging to this entry,
/// sorted by offset.
SmallVector<Value, 1> Values;
+ SmallString<8> DWARFBytes;
+ SmallVector<std::string, 1> Comments;
public:
DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val)
@@ -92,9 +96,9 @@ public:
}
/// \brief If this and Next are describing different pieces of the same
- // variable, merge them by appending Next's values to the current
- // list of values.
- // Return true if the merge was successful.
+ /// variable, merge them by appending Next's values to the current
+ /// list of values.
+ /// Return true if the merge was successful.
bool MergeValues(const DebugLocEntry &Next) {
if (Begin == Next.Begin) {
DIExpression Expr(Values[0].Expression);
@@ -135,7 +139,7 @@ public:
}) && "value must be a piece");
}
- // Sort the pieces by offset.
+ // \brief Sort the pieces by offset.
// Remove any duplicate entries by dropping all but the first.
void sortUniqueValues() {
std::sort(Values.begin(), Values.end());
@@ -146,9 +150,18 @@ public:
}),
Values.end());
}
+
+ /// \brief Lower this entry into a DWARF expression.
+ void finalize(const AsmPrinter &AP,
+ const DITypeIdentifierMap &TypeIdentifierMap);
+
+ /// \brief Return the lowered DWARF expression.
+ StringRef getDWARFBytes() const { return DWARFBytes; }
+ /// \brief Return the assembler comments for the lowered DWARF expression.
+ const SmallVectorImpl<std::string> &getComments() const { return Comments; }
};
-/// Compare two Values for equality.
+/// \brief Compare two Values for equality.
inline bool operator==(const DebugLocEntry::Value &A,
const DebugLocEntry::Value &B) {
if (A.EntryKind != B.EntryKind)
@@ -173,7 +186,7 @@ inline bool operator==(const DebugLocEntry::Value &A,
llvm_unreachable("unhandled EntryKind");
}
-/// Compare two pieces based on their offset.
+/// \brief Compare two pieces based on their offset.
inline bool operator<(const DebugLocEntry::Value &A,
const DebugLocEntry::Value &B) {
return A.getExpression().getBitPieceOffset() <
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index a71f35e..f64338e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -54,7 +54,7 @@ void DwarfAccelTable::ComputeBucketCount(void) {
// Then compute the bucket size, minimum of 1 bucket.
if (num > 1024)
Header.bucket_count = num / 4;
- if (num > 16)
+ else if (num > 16)
Header.bucket_count = num / 2;
else
Header.bucket_count = num > 0 ? num : 1;
@@ -70,6 +70,7 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
// Create the individual hash data outputs.
+ Data.reserve(Entries.size());
for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end();
EI != EE; ++EI) {
@@ -95,8 +96,17 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
for (size_t i = 0, e = Data.size(); i < e; ++i) {
uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
Buckets[bucket].push_back(Data[i]);
- Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+ Data[i]->Sym = Asm->createTempSymbol(Prefix);
}
+
+ // Sort the contents of the buckets by hash value so that hash
+ // collisions end up together. Stable sort makes testing easier and
+ // doesn't cost much more.
+ for (size_t i = 0; i < Buckets.size(); ++i)
+ std::stable_sort(Buckets[i].begin(), Buckets[i].end(),
+ [] (HashData *LHS, HashData *RHS) {
+ return LHS->HashValue < RHS->HashValue;
+ });
}
// Emits the header for the table via the AsmPrinter.
@@ -136,19 +146,32 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
Asm->EmitInt32(index);
else
Asm->EmitInt32(UINT32_MAX);
- index += Buckets[i].size();
+ // Buckets point in the list of hashes, not to the data. Do not
+ // increment the index multiple times in case of hash collisions.
+ uint64_t PrevHash = UINT64_MAX;
+ for (auto *HD : Buckets[i]) {
+ uint32_t HashValue = HD->HashValue;
+ if (PrevHash != HashValue)
+ ++index;
+ PrevHash = HashValue;
+ }
}
}
// Walk through the buckets and emit the individual hashes for each
// bucket.
void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+ uint64_t PrevHash = UINT64_MAX;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
HI != HE; ++HI) {
+ uint32_t HashValue = (*HI)->HashValue;
+ if (PrevHash == HashValue)
+ continue;
Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
- Asm->EmitInt32((*HI)->HashValue);
+ Asm->EmitInt32(HashValue);
+ PrevHash = HashValue;
}
}
}
@@ -157,11 +180,16 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
// element in each bucket. This is done via a symbol subtraction from the
// beginning of the section. The non-section symbol will be output later
// when we emit the actual data.
-void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
+ uint64_t PrevHash = UINT64_MAX;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
HI != HE; ++HI) {
+ uint32_t HashValue = (*HI)->HashValue;
+ if (PrevHash == HashValue)
+ continue;
+ PrevHash = HashValue;
Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
MCContext &Context = Asm->OutStreamer.getContext();
const MCExpr *Sub = MCBinaryExpr::CreateSub(
@@ -175,17 +203,20 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
// Walk through the buckets and emit the full data for each element in
// the bucket. For the string case emit the dies and the various offsets.
// Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
- MCSymbol *StrSym) {
- uint64_t PrevHash = UINT64_MAX;
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ uint64_t PrevHash = UINT64_MAX;
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
HI != HE; ++HI) {
+ // Terminate the previous entry if there is no hash collision
+ // with the current one.
+ if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+ Asm->EmitInt32(0);
// Remember to emit the label for our offset.
Asm->OutStreamer.EmitLabel((*HI)->Sym);
Asm->OutStreamer.AddComment((*HI)->Str);
- Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym);
+ Asm->emitSectionOffset((*HI)->Data.StrSym);
Asm->OutStreamer.AddComment("Num DIEs");
Asm->EmitInt32((*HI)->Data.Values.size());
for (HashDataContents *HD : (*HI)->Data.Values) {
@@ -200,17 +231,17 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
Asm->EmitInt8(HD->Flags);
}
}
- // Emit a 0 to terminate the data unless we have a hash collision.
- if (PrevHash != (*HI)->HashValue)
- Asm->EmitInt32(0);
PrevHash = (*HI)->HashValue;
}
+ // Emit the final end marker for the bucket.
+ if (!Buckets[i].empty())
+ Asm->EmitInt32(0);
}
}
// Emit the entire data structure to the output file.
-void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
- MCSymbol *StrSym) {
+void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin,
+ DwarfDebug *D) {
// Emit the header.
EmitHeader(Asm);
@@ -221,10 +252,10 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
EmitHashes(Asm);
// Emit the offsets.
- EmitOffsets(Asm, SecBegin);
+ emitOffsets(Asm, SecBegin);
// Emit the hash data.
- EmitData(Asm, D, StrSym);
+ EmitData(Asm, D);
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 74963da..e6fdf08 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -222,8 +222,8 @@ private:
void EmitHeader(AsmPrinter *);
void EmitBuckets(AsmPrinter *);
void EmitHashes(AsmPrinter *);
- void EmitOffsets(AsmPrinter *, MCSymbol *);
- void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym);
+ void emitOffsets(AsmPrinter *, const MCSymbol *);
+ void EmitData(AsmPrinter *, DwarfDebug *D);
// Allocator for HashData and HashDataContents.
BumpPtrAllocator Allocator;
@@ -248,7 +248,7 @@ public:
void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die,
char Flags = 0);
void FinalizeTable(AsmPrinter *, StringRef);
- void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym);
+ void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *);
#ifndef NDEBUG
void print(raw_ostream &O);
void dump() { print(dbgs()); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index f45b24c..1bee367 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -39,9 +39,24 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
+ : EHStreamer(A), shouldEmitCFI(false) {}
+
+void DwarfCFIExceptionBase::markFunctionEnd() {
+ if (shouldEmitCFI)
+ Asm->OutStreamer.EmitCFIEndProc();
+
+ if (MMI->getLandingPads().empty())
+ return;
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+}
+
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
- : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
- shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
+ : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
+ shouldEmitLSDA(false), shouldEmitMoves(false),
+ moveTypeModule(AsmPrinter::CFI_M_None) {}
DwarfCFIException::~DwarfCFIException() {}
@@ -72,8 +87,6 @@ void DwarfCFIException::endModule() {
}
}
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
@@ -100,7 +113,8 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
- if (!shouldEmitPersonality && !shouldEmitMoves)
+ shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves;
+ if (!shouldEmitCFI)
return;
Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false);
@@ -113,43 +127,18 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
- MCSymbol *EHBegin =
- Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
- if (Asm->MAI->useAssignmentForEHBegin()) {
- MCContext &Ctx = Asm->OutContext;
- MCSymbol *CurPos = Ctx.CreateTempSymbol();
- Asm->OutStreamer.EmitLabel(CurPos);
- Asm->OutStreamer.EmitAssignment(EHBegin,
- MCSymbolRefExpr::Create(CurPos, Ctx));
- } else {
- Asm->OutStreamer.EmitLabel(EHBegin);
- }
-
// Provide LSDA information.
if (!shouldEmitLSDA)
return;
- Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
- Asm->getFunctionNumber()),
- LSDAEncoding);
+ Asm->OutStreamer.EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding);
}
/// endFunction - Gather and emit post-function exception information.
///
void DwarfCFIException::endFunction(const MachineFunction *) {
- if (!shouldEmitPersonality && !shouldEmitMoves)
- return;
-
- Asm->OutStreamer.EmitCFIEndProc();
-
if (!shouldEmitPersonality)
return;
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
- Asm->getFunctionNumber()));
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
-
emitExceptionTable();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index dcc5fe4..eee5fc5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -19,7 +19,7 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU)
: DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU),
- Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) {
+ Skeleton(nullptr), BaseAddress(nullptr) {
insertDIE(Node, &getUnitDie());
}
@@ -164,24 +164,17 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
addUInt(*Loc, dwarf::DW_FORM_udata,
DD->getAddressPool().getIndex(Sym, /* TLS */ true));
}
- // 3) followed by a custom OP to make the debugger do a TLS lookup.
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
}
addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
- // Add the linkage name.
- StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty())
- // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
- // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
- // TAG_variable.
- addString(*VariableDIE,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
- : dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
+ addLinkageName(*VariableDIE, GV.getLinkageName());
} else if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
addConstantValue(*VariableDIE, CI, GTy);
@@ -243,7 +236,7 @@ void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
addSectionDelta(Die, Attribute, Label, Sec);
}
-void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
+void DwarfCompileUnit::initStmtList() {
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym =
Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
@@ -255,8 +248,9 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
// left in the skeleton CU and so not included.
// The line table entries are not always emitted in assembly, so it
// is not okay to use line_table_start here.
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
- DwarfLineSectionSym);
+ TLOF.getDwarfLineSection()->getBeginSymbol());
}
void DwarfCompileUnit::applyStmtList(DIE &D) {
@@ -285,7 +279,7 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) {
DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
- attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym());
+ attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd());
if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
*DD->getCurrentFunction()))
addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
@@ -378,13 +372,14 @@ void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
// Emit offset in .debug_range as a relocatable label. emitDIE will handle
// emitting it appropriately.
- auto *RangeSectionSym = DD->getRangeSectionSym();
+ const MCSymbol *RangeSectionSym =
+ TLOF.getDwarfRangesSection()->getBeginSymbol();
- RangeSpanList List(
- Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()),
- std::move(Range));
+ RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
// Under fission, ranges are specified by constant offsets relative to the
// CU's DW_AT_GNU_ranges_base.
@@ -709,12 +704,14 @@ void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
}
}
-void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const {
+void DwarfCompileUnit::emitHeader(bool UseOffsets) {
// Don't bother labeling the .dwo unit, as its offset isn't used.
- if (!Skeleton)
+ if (!Skeleton) {
+ LabelBegin = Asm->createTempSymbol("cu_begin");
Asm->OutStreamer.EmitLabel(LabelBegin);
+ }
- DwarfUnit::emitHeader(ASectionSym);
+ DwarfUnit::emitHeader(UseOffsets);
}
/// addGlobalName - Add a new global name to the compile unit.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index c66af65..9484bb6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -36,9 +36,6 @@ class DwarfCompileUnit : public DwarfUnit {
/// Skeleton unit associated with this unit.
DwarfCompileUnit *Skeleton;
- /// A label at the start of the non-dwo section related to this unit.
- MCSymbol *SectionSym;
-
/// The start of the unit within its section.
MCSymbol *LabelBegin;
@@ -76,7 +73,7 @@ public:
return Skeleton;
}
- void initStmtList(MCSymbol *DwarfLineSectionSym);
+ void initStmtList();
/// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
void applyStmtList(DIE &D);
@@ -168,22 +165,9 @@ public:
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
- MCSymbol *getSectionSym() const {
+ const MCSymbol *getSectionSym() const {
assert(Section);
- return SectionSym;
- }
-
- /// Pass in the SectionSym even though we could recreate it in every compile
- /// unit (type units will have actually distinct symbols once they're in
- /// comdat sections).
- void initSection(const MCSection *Section, MCSymbol *SectionSym) {
- DwarfUnit::initSection(Section);
- this->SectionSym = SectionSym;
-
- // Don't bother labeling the .dwo unit, as its offset isn't used.
- if (!Skeleton)
- LabelBegin =
- Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
+ return Section->getBeginSymbol();
}
unsigned getLength() {
@@ -191,7 +175,7 @@ public:
getHeaderSize() + UnitDie.getSize();
}
- void emitHeader(const MCSymbol *ASectionSym) const override;
+ void emitHeader(bool UseOffsets) override;
MCSymbol *getLabelBegin() const {
assert(Section);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index aa1f79f..e9ebd97 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/MD5.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -105,6 +106,25 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "DWARF Debug Writer";
+void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+ BS.EmitInt8(
+ Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
+ : dwarf::OperationEncodingString(Op));
+}
+
+void DebugLocDwarfExpression::EmitSigned(int64_t Value) {
+ BS.EmitSLEB128(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
+ BS.EmitULEB128(Value, Twine(Value));
+}
+
+bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
+ // This information is not available while emitting .debug_loc entries.
+ return false;
+}
+
//===----------------------------------------------------------------------===//
/// resolve - Look in the DwarfDebug map for the MDNode that
@@ -169,11 +189,12 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
- : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0),
- InfoHolder(A, *this, "info_string", DIEValueAllocator),
+ : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr),
+ InfoHolder(A, "info_string", DIEValueAllocator),
UsedNonDefaultText(false),
- SkeletonHolder(A, *this, "skel_string", DIEValueAllocator),
+ SkeletonHolder(A, "skel_string", DIEValueAllocator),
IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
+ IsPS4(Triple(A->getTargetTriple()).isPS4()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -182,17 +203,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
dwarf::DW_FORM_data4)),
AccelTypes(TypeAtoms) {
- DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr;
- DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr;
- DwarfLineSectionSym = nullptr;
- DwarfAddrSectionSym = nullptr;
- DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr;
- FunctionBeginSym = FunctionEndSym = nullptr;
CurFn = nullptr;
CurMI = nullptr;
// Turn on accelerator tables for Darwin by default, pubnames by
- // default for non-Darwin, and handle split dwarf.
+ // default for non-Darwin/PS4, and handle split dwarf.
if (DwarfAccelTables == Default)
HasDwarfAccelTables = IsDarwin;
else
@@ -204,7 +219,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
HasSplitDwarf = SplitDwarf == Enable;
if (DwarfPubSections == Default)
- HasDwarfPubSections = !IsDarwin;
+ HasDwarfPubSections = !IsDarwin && !IsPS4;
else
HasDwarfPubSections = DwarfPubSections == Enable;
@@ -212,6 +227,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
+ // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3).
+ // Everybody else uses GNU's.
+ UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3;
+
Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
{
@@ -223,19 +242,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
DwarfDebug::~DwarfDebug() { }
-// Switch to the specified MCSection and emit an assembler
-// temporary label to it if SymbolStem is specified.
-static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
- const char *SymbolStem = nullptr) {
- Asm->OutStreamer.SwitchSection(Section);
- if (!SymbolStem)
- return nullptr;
-
- MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
- Asm->OutStreamer.EmitLabel(TmpSym);
- return TmpSym;
-}
-
static bool isObjCClass(StringRef Name) {
return Name.startswith("+") || Name.startswith("-");
}
@@ -264,13 +270,6 @@ static StringRef getObjCMethodName(StringRef In) {
return In.slice(In.find(' ') + 1, In.find(']'));
}
-// Helper for sorting sections into a stable output order.
-static bool SectionSort(const MCSection *A, const MCSection *B) {
- std::string LA = (A ? A->getLabelBeginName() : "");
- std::string LB = (B ? B->getLabelBeginName() : "");
- return LA < LB;
-}
-
// Add the various names to the Dwarf accelerator table names.
// TODO: Determine whether or not we should add names for programs
// that do not have a DW_AT_name or DW_AT_linkage_name field - this
@@ -388,7 +387,7 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
NewCU.addString(Die, dwarf::DW_AT_name, FN);
if (!useSplitDwarf()) {
- NewCU.initStmtList(DwarfLineSectionSym);
+ NewCU.initStmtList();
// If we're using split dwarf the compilation dir is going to be in the
// skeleton CU and so we don't need to duplicate it here.
@@ -410,11 +409,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
dwarf::DW_FORM_data1, RVer);
if (useSplitDwarf())
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
- DwarfInfoDWOSectionSym);
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
else
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
- DwarfInfoSectionSym);
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
CUMap.insert(std::make_pair(DIUnit, &NewCU));
CUDieMap.insert(std::make_pair(&Die, &NewCU));
@@ -445,9 +442,6 @@ void DwarfDebug::beginModule() {
return;
TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes);
- // Emit initial sections so we can reference labels later.
- emitSectionLabels();
-
SingleCU = CU_Nodes->getNumOperands() == 1;
for (MDNode *N : CU_Nodes->operands()) {
@@ -458,8 +452,11 @@ void DwarfDebug::beginModule() {
ScopesWithImportedEntities.push_back(std::make_pair(
DIImportedEntity(ImportedEntities.getElement(i)).getContext(),
ImportedEntities.getElement(i)));
- std::sort(ScopesWithImportedEntities.begin(),
- ScopesWithImportedEntities.end(), less_first());
+ // Stable sort to preserve the order of appearance of imported entities.
+ // This is to avoid out-of-order processing of interdependent declarations
+ // within the same scope, e.g. { namespace A = base; namespace B = A; }
+ std::stable_sort(ScopesWithImportedEntities.begin(),
+ ScopesWithImportedEntities.end(), less_first());
DIArray GVs = CUNode.getGlobalVariables();
for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
@@ -541,6 +538,8 @@ void DwarfDebug::collectDeadVariables() {
}
void DwarfDebug::finalizeModuleInfo() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
finishSubprogramDefinitions();
finishVariableDefinitions();
@@ -570,13 +569,16 @@ void DwarfDebug::finalizeModuleInfo() {
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty())
+ if (!AddrPool.isEmpty()) {
+ const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
- DwarfAddrSectionSym, DwarfAddrSectionSym);
- if (!SkCU->getRangeLists().empty())
+ Sym, Sym);
+ }
+ if (!SkCU->getRangeLists().empty()) {
+ const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
- DwarfDebugRangeSectionSym,
- DwarfDebugRangeSectionSym);
+ Sym, Sym);
+ }
}
// If we have code split among multiple sections or non-contiguous
@@ -613,7 +615,7 @@ void DwarfDebug::endModule() {
// If we aren't actually generating debug info (check beginModule -
// conditionalized on !DisableDebugInfoPrinting and the presence of the
// llvm.dbg.cu metadata node)
- if (!DwarfInfoSectionSym)
+ if (!MMI->hasDebugInfo())
return;
// Finalize the debug info for the module.
@@ -621,12 +623,18 @@ void DwarfDebug::endModule() {
emitDebugStr();
- // Emit all the DIEs into a debug info section.
- emitDebugInfo();
+ if (useSplitDwarf())
+ emitDebugLocDWO();
+ else
+ // Emit info into a debug loc section.
+ emitDebugLoc();
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
+ // Emit all the DIEs into a debug info section.
+ emitDebugInfo();
+
// Emit info into a debug aranges section.
if (GenerateARangeSection)
emitDebugARanges();
@@ -639,12 +647,9 @@ void DwarfDebug::endModule() {
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
- emitDebugLocDWO();
// Emit DWO addresses.
AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
- } else
- // Emit info into a debug loc section.
- emitDebugLoc();
+ }
// Emit info into the dwarf accelerator table sections.
if (useDwarfAccelTables()) {
@@ -828,7 +833,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
if (End != nullptr)
EndLabel = getLabelAfterInsn(End);
else if (std::next(I) == Ranges.end())
- EndLabel = FunctionEndSym;
+ EndLabel = Asm->getFunctionEnd();
else
EndLabel = getLabelBeforeInsn(std::next(I)->first);
assert(EndLabel && "Forgot label after instruction ending a range!");
@@ -922,11 +927,13 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1);
DebugLocList &LocList = DotDebugLocEntries.back();
LocList.CU = &TheCU;
- LocList.Label =
- Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1);
+ LocList.Label = Asm->createTempSymbol("debug_loc");
// Build the location list for this variable.
buildLocationList(LocList.List, Ranges);
+ // Finalize the entry by lowering it into a DWARF bytestream.
+ for (auto &Entry : LocList.List)
+ Entry.finalize(*Asm, TypeIdentifierMap);
}
// Collect info for variables that were optimized out.
@@ -964,23 +971,25 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// Check if source location changes, but ignore DBG_VALUE locations.
if (!MI->isDebugValue()) {
DebugLoc DL = MI->getDebugLoc();
- if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
- unsigned Flags = 0;
- PrevInstLoc = DL;
- if (DL == PrologEndLoc) {
- Flags |= DWARF2_FLAG_PROLOGUE_END;
- PrologEndLoc = DebugLoc();
- Flags |= DWARF2_FLAG_IS_STMT;
- }
- if (DL.getLine() !=
- Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
- Flags |= DWARF2_FLAG_IS_STMT;
-
+ if (DL != PrevInstLoc) {
if (!DL.isUnknown()) {
+ unsigned Flags = 0;
+ PrevInstLoc = DL;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ PrologEndLoc = DebugLoc();
+ Flags |= DWARF2_FLAG_IS_STMT;
+ }
+ if (DL.getLine() !=
+ Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
+ Flags |= DWARF2_FLAG_IS_STMT;
+
const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
- } else
+ } else if (UnknownLocations) {
+ PrevInstLoc = DL;
recordSourceLine(0, 0, nullptr, 0);
+ }
}
}
@@ -1116,11 +1125,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
else
Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
- // Emit a label for the function so that we have a beginning address.
- FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber());
- // Assumes in correct section after the entry point.
- Asm->OutStreamer.EmitLabel(FunctionBeginSym);
-
// Calculate history for local variables.
calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
DbgValues);
@@ -1131,12 +1135,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (Ranges.empty())
continue;
- // The first mention of a function argument gets the FunctionBeginSym
+ // The first mention of a function argument gets the CurrentFnBegin
// label, so arguments are visible when breaking at function entry.
DIVariable DIVar(Ranges.front().first->getDebugVariable());
if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
- LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym;
+ LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
if (Ranges.front().first->getDebugExpression().isBitPiece()) {
// Mark all non-overlapping initial pieces.
for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
@@ -1145,7 +1149,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
[&](DbgValueHistoryMap::InstrRange Pred) {
return !piecesOverlap(Piece, Pred.first->getDebugExpression());
}))
- LabelsBeforeInsn[I->first] = FunctionBeginSym;
+ LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
else
break;
}
@@ -1160,7 +1164,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
PrevInstLoc = DebugLoc();
- PrevLabel = FunctionBeginSym;
+ PrevLabel = Asm->getFunctionBegin();
// Record beginning of function.
PrologEndLoc = findPrologueEndLoc(MF);
@@ -1191,11 +1195,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
return;
}
- // Define end label for subprogram.
- FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber());
- // Assumes in correct section after the entry point.
- Asm->OutStreamer.EmitLabel(FunctionEndSym);
-
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
@@ -1207,7 +1206,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
collectVariableInfo(TheCU, SP, ProcessedVars);
// Add the range of this function to the list of ranges for the CU.
- TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym));
+ TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
// Under -gmlt, skip building the subprogram if there are no inlined
// subroutines inside it.
@@ -1290,103 +1289,10 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
// Emit Methods
//===----------------------------------------------------------------------===//
-// Emit initial Dwarf sections with a label at the start of each one.
-void DwarfDebug::emitSectionLabels() {
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
- // Dwarf sections base addresses.
- DwarfInfoSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
- if (useSplitDwarf()) {
- DwarfInfoDWOSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo");
- DwarfTypesDWOSectionSym = emitSectionSym(
- Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo");
- }
- DwarfAbbrevSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
- if (useSplitDwarf())
- DwarfAbbrevDWOSectionSym = emitSectionSym(
- Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo");
- if (GenerateARangeSection)
- emitSectionSym(Asm, TLOF.getDwarfARangesSection());
-
- DwarfLineSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
- if (GenerateGnuPubSections) {
- DwarfGnuPubNamesSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection());
- DwarfGnuPubTypesSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection());
- } else if (HasDwarfPubSections) {
- emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
- emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
- }
-
- DwarfStrSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
- if (useSplitDwarf()) {
- DwarfStrDWOSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
- DwarfAddrSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
- DwarfDebugLocSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc");
- } else
- DwarfDebugLocSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc");
- DwarfDebugRangeSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range");
-}
-
-// Recursively emits a debug information entry.
-void DwarfDebug::emitDIE(DIE &Die) {
- // Get the abbreviation for this DIE.
- const DIEAbbrev &Abbrev = Die.getAbbrev();
-
- // Emit the code (index) for the abbreviation.
- if (Asm->isVerbose())
- Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
- "] 0x" + Twine::utohexstr(Die.getOffset()) +
- ":0x" + Twine::utohexstr(Die.getSize()) + " " +
- dwarf::TagString(Abbrev.getTag()));
- Asm->EmitULEB128(Abbrev.getNumber());
-
- const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
-
- // Emit the DIE attribute values.
- for (unsigned i = 0, N = Values.size(); i < N; ++i) {
- dwarf::Attribute Attr = AbbrevData[i].getAttribute();
- dwarf::Form Form = AbbrevData[i].getForm();
- assert(Form && "Too many attributes for DIE (check abbreviation)");
-
- if (Asm->isVerbose()) {
- Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
- if (Attr == dwarf::DW_AT_accessibility)
- Asm->OutStreamer.AddComment(dwarf::AccessibilityString(
- cast<DIEInteger>(Values[i])->getValue()));
- }
-
- // Emit an attribute using the defined form.
- Values[i]->EmitValue(Asm, Form);
- }
-
- // Emit the DIE children if any.
- if (Abbrev.hasChildren()) {
- for (auto &Child : Die.getChildren())
- emitDIE(*Child);
-
- Asm->OutStreamer.AddComment("End Of Children Mark");
- Asm->EmitInt8(0);
- }
-}
-
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
-
- Holder.emitUnits(DwarfAbbrevSectionSym);
+ Holder.emitUnits(/* UseOffsets */ false);
}
// Emit the abbreviation section.
@@ -1396,65 +1302,39 @@ void DwarfDebug::emitAbbreviations() {
Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
}
-// Emit the last address of the section and the end of the line matrix.
-void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
- // Define last address of section.
- Asm->OutStreamer.AddComment("Extended Op");
- Asm->EmitInt8(0);
-
- Asm->OutStreamer.AddComment("Op size");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
- Asm->OutStreamer.AddComment("DW_LNE_set_address");
- Asm->EmitInt8(dwarf::DW_LNE_set_address);
-
- Asm->OutStreamer.AddComment("Section end label");
-
- Asm->OutStreamer.EmitSymbolValue(
- Asm->GetTempSymbol("section_end", SectionEnd),
- Asm->getDataLayout().getPointerSize());
-
- // Mark end of matrix.
- Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
- Asm->EmitInt8(0);
- Asm->EmitInt8(1);
- Asm->EmitInt8(1);
-}
-
void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
- StringRef TableName, StringRef SymName) {
+ StringRef TableName) {
Accel.FinalizeTable(Asm, TableName);
Asm->OutStreamer.SwitchSection(Section);
- auto *SectionBegin = Asm->GetTempSymbol(SymName);
- Asm->OutStreamer.EmitLabel(SectionBegin);
// Emit the full data.
- Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym);
+ Accel.emit(Asm, Section->getBeginSymbol(), this);
}
// Emit visible names into a hashed accelerator table section.
void DwarfDebug::emitAccelNames() {
emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
- "Names", "names_begin");
+ "Names");
}
// Emit objective C classes and categories into a hashed accelerator table
// section.
void DwarfDebug::emitAccelObjC() {
emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
- "ObjC", "objc_begin");
+ "ObjC");
}
// Emit namespace dies into a hashed accelerator table.
void DwarfDebug::emitAccelNamespaces() {
emitAccel(AccelNamespace,
Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
- "namespac", "namespac_begin");
+ "namespac");
}
// Emit type dies into a hashed accelerator table.
void DwarfDebug::emitAccelTypes() {
emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
- "types", "types_begin");
+ "types");
}
// Public name handling.
@@ -1537,15 +1417,14 @@ void DwarfDebug::emitDebugPubSection(
if (auto *Skeleton = TheU->getSkeleton())
TheU = Skeleton;
- unsigned ID = TheU->getUniqueID();
// Start the dwarf pubnames section.
Asm->OutStreamer.SwitchSection(PSec);
// Emit the header.
Asm->OutStreamer.AddComment("Length of Public " + Name + " Info");
- MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID);
- MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID);
+ MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
+ MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
Asm->OutStreamer.EmitLabel(BeginLabel);
@@ -1554,7 +1433,7 @@ void DwarfDebug::emitDebugPubSection(
Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
- Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym());
+ Asm->emitSectionOffset(TheU->getLabelBegin());
Asm->OutStreamer.AddComment("Compilation Unit Length");
Asm->EmitInt32(TheU->getLength());
@@ -1600,62 +1479,27 @@ void DwarfDebug::emitDebugStr() {
Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
}
-/// Emits an optimal (=sorted) sequence of DW_OP_pieces.
-void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
- const DITypeIdentifierMap &Map,
- ArrayRef<DebugLocEntry::Value> Values) {
- assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
- return P.isBitPiece();
- }) && "all values are expected to be pieces");
- assert(std::is_sorted(Values.begin(), Values.end()) &&
- "pieces are expected to be sorted");
-
- unsigned Offset = 0;
- for (auto Piece : Values) {
- DIExpression Expr = Piece.getExpression();
- unsigned PieceOffset = Expr.getBitPieceOffset();
- unsigned PieceSize = Expr.getBitPieceSize();
- assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
- if (Offset < PieceOffset) {
- // The DWARF spec seriously mandates pieces with no locations for gaps.
- Asm->EmitDwarfOpPiece(Streamer, PieceOffset-Offset);
- Offset += PieceOffset-Offset;
- }
- Offset += PieceSize;
-
-#ifndef NDEBUG
- DIVariable Var = Piece.getVariable();
- unsigned VarSize = Var.getSizeInBits(Map);
- assert(PieceSize+PieceOffset <= VarSize
- && "piece is larger than or outside of variable");
- assert(PieceSize != VarSize
- && "piece covers entire variable");
-#endif
- emitDebugLocValue(Streamer, Piece, PieceOffset);
- }
-}
-
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocEntry &Entry) {
- const DebugLocEntry::Value Value = Entry.getValues()[0];
- if (Value.isBitPiece())
- // Emit all pieces that belong to the same variable and range.
- return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues());
-
- assert(Entry.getValues().size() == 1 && "only pieces may have >1 value");
- emitDebugLocValue(Streamer, Value);
+ auto Comment = Entry.getComments().begin();
+ auto End = Entry.getComments().end();
+ for (uint8_t Byte : Entry.getDWARFBytes())
+ Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
}
-void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
- const DebugLocEntry::Value &Value,
- unsigned PieceOffsetInBits) {
+static void emitDebugLocValue(const AsmPrinter &AP,
+ const DITypeIdentifierMap &TypeIdentifierMap,
+ ByteStreamer &Streamer,
+ const DebugLocEntry::Value &Value,
+ unsigned PieceOffsetInBits) {
DIVariable DV = Value.getVariable();
- DebugLocDwarfExpression DwarfExpr(*Asm, Streamer);
-
+ DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(),
+ AP.getDwarfDebug()->getDwarfVersion(),
+ Streamer);
// Regular entry.
if (Value.isInt()) {
- DIBasicType BTy(resolve(DV.getType()));
+ DIBasicType BTy(DV.getType().resolve(TypeIdentifierMap));
if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
BTy.getEncoding() == dwarf::DW_ATE_signed_char))
DwarfExpr.AddSignedConstant(Value.getInt());
@@ -1666,7 +1510,7 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
DIExpression Expr = Value.getExpression();
if (!Expr || (Expr.getNumElements() == 0))
// Regular entry.
- Asm->EmitDwarfRegOp(Streamer, Loc);
+ AP.EmitDwarfRegOp(Streamer, Loc);
else {
// Complex address entry.
if (Loc.getOffset()) {
@@ -1682,6 +1526,52 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
// FIXME: ^
}
+
+void DebugLocEntry::finalize(const AsmPrinter &AP,
+ const DITypeIdentifierMap &TypeIdentifierMap) {
+ BufferByteStreamer Streamer(DWARFBytes, Comments);
+ const DebugLocEntry::Value Value = Values[0];
+ if (Value.isBitPiece()) {
+ // Emit all pieces that belong to the same variable and range.
+ assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
+ return P.isBitPiece();
+ }) && "all values are expected to be pieces");
+ assert(std::is_sorted(Values.begin(), Values.end()) &&
+ "pieces are expected to be sorted");
+
+ unsigned Offset = 0;
+ for (auto Piece : Values) {
+ DIExpression Expr = Piece.getExpression();
+ unsigned PieceOffset = Expr.getBitPieceOffset();
+ unsigned PieceSize = Expr.getBitPieceSize();
+ assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
+ if (Offset < PieceOffset) {
+ // The DWARF spec seriously mandates pieces with no locations for gaps.
+ DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(),
+ AP.getDwarfDebug()->getDwarfVersion(),
+ Streamer);
+ Expr.AddOpPiece(PieceOffset-Offset, 0);
+ Offset += PieceOffset-Offset;
+ }
+ Offset += PieceSize;
+
+#ifndef NDEBUG
+ DIVariable Var = Piece.getVariable();
+ unsigned VarSize = Var.getSizeInBits(TypeIdentifierMap);
+ assert(PieceSize+PieceOffset <= VarSize
+ && "piece is larger than or outside of variable");
+ assert(PieceSize != VarSize
+ && "piece covers entire variable");
+#endif
+ emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Piece, PieceOffset);
+ }
+ } else {
+ assert(Values.size() == 1 && "only pieces may have >1 value");
+ emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Value, 0);
+ }
+}
+
+
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) {
Asm->OutStreamer.AddComment("Loc expr size");
MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
@@ -1752,10 +1642,7 @@ struct ArangeSpan {
// address we can tie back to a CU.
void DwarfDebug::emitDebugARanges() {
// Provides a unique id per text section.
- DenseMap<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
-
- // Prime section data.
- SectionMap[Asm->getObjFileLowering().getTextSection()];
+ MapVector<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
// Filter labels by section.
for (const SymbolCU &SCU : ArangeLabels) {
@@ -1772,31 +1659,13 @@ void DwarfDebug::emitDebugARanges() {
}
}
- // Build a list of sections used.
- std::vector<const MCSection *> Sections;
- for (const auto &it : SectionMap) {
- const MCSection *Section = it.first;
- Sections.push_back(Section);
- }
-
- // Sort the sections into order.
- // This is only done to ensure consistent output order across different runs.
- std::sort(Sections.begin(), Sections.end(), SectionSort);
-
// Add terminating symbols for each section.
- for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) {
- const MCSection *Section = Sections[ID];
+ for (const auto &I : SectionMap) {
+ const MCSection *Section = I.first;
MCSymbol *Sym = nullptr;
- if (Section) {
- // We can't call MCSection::getLabelEndName, as it's only safe to do so
- // if we know the section name up-front. For user-created sections, the
- // resulting label may not be valid to use as a label. (section names can
- // use a greater set of characters on some systems)
- Sym = Asm->GetTempSymbol("debug_end", ID);
- Asm->OutStreamer.SwitchSection(Section);
- Asm->OutStreamer.EmitLabel(Sym);
- }
+ if (Section)
+ Sym = Asm->OutStreamer.endSection(Section);
// Insert a final terminator.
SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
@@ -1804,8 +1673,9 @@ void DwarfDebug::emitDebugARanges() {
DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
- for (const MCSection *Section : Sections) {
- SmallVector<SymbolCU, 8> &List = SectionMap[Section];
+ for (auto &I : SectionMap) {
+ const MCSection *Section = I.first;
+ SmallVector<SymbolCU, 8> &List = I.second;
if (List.size() < 2)
continue;
@@ -1902,7 +1772,7 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer.AddComment("DWARF Arange version number");
Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer.AddComment("Offset Into Debug Info Section");
- Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym());
+ Asm->emitSectionOffset(CU->getLabelBegin());
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(PtrSize);
Asm->OutStreamer.AddComment("Segment Size (in bytes)");
@@ -1998,10 +1868,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
auto OwnedUnit = make_unique<DwarfCompileUnit>(
CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
- DwarfInfoSectionSym);
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
- NewCU.initStmtList(DwarfLineSectionSym);
+ NewCU.initStmtList();
initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
@@ -2012,9 +1881,8 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
// compile units that would normally be in debug_info.
void DwarfDebug::emitDebugInfoDWO() {
assert(useSplitDwarf() && "No split dwarf debug info?");
- // Don't pass an abbrev symbol, using a constant zero instead so as not to
- // emit relocations into the dwo file.
- InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr);
+ // Don't emit relocations into the dwo file.
+ InfoHolder.emitUnits(/* UseOffsets */ true);
}
// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
@@ -2058,7 +1926,7 @@ static uint64_t makeTypeSignature(StringRef Identifier) {
// appropriately.
MD5::MD5Result Result;
Hash.final(Result);
- return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+ return support::endian::read64le(Result + 8);
}
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 1c0e163..74db3ef 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -88,7 +88,8 @@ public:
: Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U),
MInsn(nullptr), DD(DD) {
FrameIndex.push_back(FI);
- assert(Var.Verify() && E.Verify());
+ assert(Var.Verify());
+ assert(!E || E->isValid());
}
/// Construct a DbgVariable from a DEBUG_VALUE.
@@ -243,25 +244,10 @@ class DwarfDebug : public AsmPrinterHandler {
// If nonnull, stores the CU in which the previous subprogram was contained.
const DwarfCompileUnit *PrevCU;
- // Section Symbols: these are assembler temporary labels that are emitted at
- // the beginning of each supported dwarf section. These are used to form
- // section offsets and are created by EmitSectionLabels.
- MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
- MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
- MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
- MCSymbol *FunctionBeginSym, *FunctionEndSym;
- MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym;
- MCSymbol *DwarfTypesDWOSectionSym;
- MCSymbol *DwarfStrDWOSectionSym;
- MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym;
-
// As an optimization, there is no need to emit an entry in the directory
// table for the same directory as DW_AT_comp_dir.
StringRef CompilationDir;
- // Counter for assigning globally unique IDs for ranges.
- unsigned GlobalRangeCount;
-
// Holder for the file specific debug information.
DwarfFile InfoHolder;
@@ -290,6 +276,9 @@ class DwarfDebug : public AsmPrinterHandler {
// text.
bool UsedNonDefaultText;
+ // Whether to use the GNU TLS opcode (instead of the standard opcode).
+ bool UseGNUTLSOpcode;
+
// Version of dwarf we're emitting.
unsigned DwarfVersion;
@@ -318,6 +307,7 @@ class DwarfDebug : public AsmPrinterHandler {
// True iff there are multiple CUs in this module.
bool SingleCU;
bool IsDarwin;
+ bool IsPS4;
AddressPool AddrPool;
@@ -347,9 +337,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
- /// \brief Emit initial Dwarf sections with a label at the start of each one.
- void emitSectionLabels();
-
/// \brief Compute the size and offset of a DIE given an incoming Offset.
unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
@@ -373,13 +360,9 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Emit the abbreviation section.
void emitAbbreviations();
- /// \brief Emit the last address of the section and the end of
- /// the line matrix.
- void emitEndOfLineMatrix(unsigned SectionEnd);
-
/// \brief Emit a specified accelerator table.
void emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
- StringRef TableName, StringRef SymName);
+ StringRef TableName);
/// \brief Emit visible names into a hashed accelerator table section.
void emitAccelNames();
@@ -540,8 +523,9 @@ public:
SymSize[Sym] = Size;
}
- /// \brief Recursively Emits a debug information entry.
- void emitDIE(DIE &Die);
+ /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the
+ /// standard DW_OP_form_tls_address opcode
+ bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
// Experimental DWARF5 features.
@@ -556,15 +540,6 @@ public:
/// Returns the Dwarf Version.
unsigned getDwarfVersion() const { return DwarfVersion; }
- /// Returns the section symbol for the .debug_loc section.
- MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; }
-
- /// Returns the section symbol for the .debug_str section.
- MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; }
-
- /// Returns the section symbol for the .debug_ranges section.
- MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; }
-
/// Returns the previous CU that was being updated
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
@@ -577,7 +552,8 @@ public:
/// \brief Emit an entry for the debug loc section. This can be used to
/// handle an entry that's going to be emitted into the debug loc section.
- void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry);
+ void emitDebugLocEntry(ByteStreamer &Streamer,
+ const DebugLocEntry &Entry);
/// \brief emit a single value for the debug loc section.
void emitDebugLocValue(ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
@@ -621,8 +597,6 @@ public:
void addAccelType(StringRef Name, const DIE &Die, char Flags);
const MachineFunction *getCurrentFunction() const { return CurFn; }
- const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; }
- const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; }
iterator_range<ImportedEntityMap::const_iterator>
findImportedEntitiesForScope(const MDNode *Scope) const {
@@ -642,12 +616,6 @@ public:
/// \brief Return Label immediately following the instruction.
MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
- // FIXME: Consider rolling ranges up into DwarfDebug since we use a single
- // range_base anyway, so there's no need to keep them as separate per-CU range
- // lists. (though one day we might end up with a range.dwo section, in which
- // case it'd go to DwarfFile)
- unsigned getNextRangeNumber() { return GlobalRangeCount++; }
-
// FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index e8867c0..6eaf707 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -21,17 +21,24 @@ namespace llvm {
class MachineFunction;
class ARMTargetStreamer;
-class DwarfCFIException : public EHStreamer {
- /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
- /// should be emitted.
+class DwarfCFIExceptionBase : public EHStreamer {
+protected:
+ DwarfCFIExceptionBase(AsmPrinter *A);
+
+ /// Per-function flag to indicate if frame CFI info should be emitted.
+ bool shouldEmitCFI;
+
+ void markFunctionEnd() override;
+};
+
+class DwarfCFIException : public DwarfCFIExceptionBase {
+ /// Per-function flag to indicate if .cfi_personality should be emitted.
bool shouldEmitPersonality;
- /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
- /// should be emitted.
+ /// Per-function flag to indicate if .cfi_lsda should be emitted.
bool shouldEmitLSDA;
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
+ /// Per-function flag to indicate if frame moves info should be emitted.
bool shouldEmitMoves;
AsmPrinter::CFIMoveType moveTypeModule;
@@ -43,26 +50,21 @@ public:
DwarfCFIException(AsmPrinter *A);
virtual ~DwarfCFIException();
- /// endModule - Emit all exception information that should come after the
- /// content.
+ /// Emit all exception information that should come after the content.
void endModule() override;
- /// beginFunction - Gather pre-function exception information. Assumes being
- /// emitted immediately after the function entry point.
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
void beginFunction(const MachineFunction *MF) override;
- /// endFunction - Gather and emit post-function exception information.
+ /// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
};
-class ARMException : public EHStreamer {
+class ARMException : public DwarfCFIExceptionBase {
void emitTypeInfos(unsigned TTypeEncoding) override;
ARMTargetStreamer &getTargetStreamer();
- /// shouldEmitCFI - Per-function flag to indicate if frame CFI info
- /// should be emitted.
- bool shouldEmitCFI;
-
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -70,15 +72,14 @@ public:
ARMException(AsmPrinter *A);
virtual ~ARMException();
- /// endModule - Emit all exception information that should come after the
- /// content.
+ /// Emit all exception information that should come after the content.
void endModule() override;
- /// beginFunction - Gather pre-function exception information. Assumes being
- /// emitted immediately after the function entry point.
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
void beginFunction(const MachineFunction *MF) override;
- /// endFunction - Gather and emit post-function exception information.
+ /// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index fcab067..489e455 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -22,14 +22,6 @@
using namespace llvm;
-const TargetRegisterInfo *DwarfExpression::getTRI() const {
- return AP.TM.getSubtargetImpl()->getRegisterInfo();
-}
-
-unsigned DwarfExpression::getDwarfVersion() const {
- return AP.getDwarfDebug()->getDwarfVersion();
-}
-
void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
if (DwarfReg < 32) {
@@ -74,28 +66,28 @@ void DwarfExpression::AddShr(unsigned ShiftBy) {
}
bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
- int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false);
- if (DwarfReg < 0)
- return false;
-
if (isFrameRegister(MachineReg)) {
// If variable offset is based in frame register then use fbreg.
EmitOp(dwarf::DW_OP_fbreg);
EmitSigned(Offset);
- } else {
- AddRegIndirect(DwarfReg, Offset);
+ return true;
}
+
+ int DwarfReg = TRI.getDwarfRegNum(MachineReg, false);
+ if (DwarfReg < 0)
+ return false;
+
+ AddRegIndirect(DwarfReg, Offset);
return true;
}
bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
unsigned PieceSizeInBits,
unsigned PieceOffsetInBits) {
- const TargetRegisterInfo *TRI = getTRI();
- if (!TRI->isPhysicalRegister(MachineReg))
+ if (!TRI.isPhysicalRegister(MachineReg))
return false;
- int Reg = TRI->getDwarfRegNum(MachineReg, false);
+ int Reg = TRI.getDwarfRegNum(MachineReg, false);
// If this is a valid register number, emit it.
if (Reg >= 0) {
@@ -107,12 +99,12 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
// Walk up the super-register chain until we find a valid number.
// For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
- for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
- Reg = TRI->getDwarfRegNum(*SR, false);
+ for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+ Reg = TRI.getDwarfRegNum(*SR, false);
if (Reg >= 0) {
- unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg);
- unsigned Size = TRI->getSubRegIdxSize(Idx);
- unsigned RegOffset = TRI->getSubRegIdxOffset(Idx);
+ unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
+ unsigned Size = TRI.getSubRegIdxSize(Idx);
+ unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
AddReg(Reg, "super-register");
if (PieceOffsetInBits == RegOffset) {
AddOpPiece(Size, RegOffset);
@@ -136,15 +128,15 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
// efficient DW_OP_piece.
unsigned CurPos = PieceOffsetInBits;
// The size of the register in bits, assuming 8 bits per byte.
- unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8;
+ unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
// Keep track of the bits in the register we already emitted, so we
// can avoid emitting redundant aliasing subregs.
SmallBitVector Coverage(RegSize, false);
- for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
- unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR);
- unsigned Size = TRI->getSubRegIdxSize(Idx);
- unsigned Offset = TRI->getSubRegIdxOffset(Idx);
- Reg = TRI->getDwarfRegNum(*SR, false);
+ for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+ unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR);
+ unsigned Size = TRI.getSubRegIdxSize(Idx);
+ unsigned Offset = TRI.getSubRegIdxOffset(Idx);
+ Reg = TRI.getDwarfRegNum(*SR, false);
// Intersection between the bits we already emitted and the bits
// covered by this subregister.
@@ -180,7 +172,7 @@ void DwarfExpression::AddSignedConstant(int Value) {
// value, so the producers and consumers started to rely on heuristics
// to disambiguate the value vs. location status of the expression.
// See PR21176 for more details.
- if (getDwarfVersion() >= 4)
+ if (DwarfVersion >= 4)
EmitOp(dwarf::DW_OP_stack_value);
}
@@ -188,7 +180,7 @@ void DwarfExpression::AddUnsignedConstant(unsigned Value) {
EmitOp(dwarf::DW_OP_constu);
EmitUnsigned(Value);
// cf. comment in DwarfExpression::AddSignedConstant().
- if (getDwarfVersion() >= 4)
+ if (DwarfVersion >= 4)
EmitOp(dwarf::DW_OP_stack_value);
}
@@ -204,11 +196,12 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
unsigned MachineReg,
unsigned PieceOffsetInBits) {
auto I = Expr.begin();
- // Pattern-match combinations for which more efficient representations exist
- // first.
- if (I == Expr.end())
+ auto E = Expr.end();
+ if (I == E)
return AddMachineRegPiece(MachineReg);
+ // Pattern-match combinations for which more efficient representations exist
+ // first.
bool ValidReg = false;
switch (*I) {
case dwarf::DW_OP_bit_piece: {
@@ -218,20 +211,23 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
return AddMachineRegPiece(MachineReg, SizeInBits,
getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
}
- case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_plus: {
// [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
- if (I->getNext() == dwarf::DW_OP_deref) {
+ auto N = I->getNext();
+ if ((N != E) && (*N == dwarf::DW_OP_deref)) {
unsigned Offset = I->getArg(1);
ValidReg = AddMachineRegIndirect(MachineReg, Offset);
std::advance(I, 2);
break;
} else
ValidReg = AddMachineRegPiece(MachineReg);
- case dwarf::DW_OP_deref:
- // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
- ValidReg = AddMachineRegIndirect(MachineReg);
- ++I;
- break;
+ }
+ case dwarf::DW_OP_deref: {
+ // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+ ValidReg = AddMachineRegIndirect(MachineReg);
+ ++I;
+ break;
+ }
default:
llvm_unreachable("unsupported operand");
}
@@ -240,7 +236,7 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
return false;
// Emit remaining elements of the expression.
- AddExpression(I, Expr.end(), PieceOffsetInBits);
+ AddExpression(I, E, PieceOffsetInBits);
return true;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index b90b7b6..985d52c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -30,21 +30,22 @@ class DIELoc;
/// entry.
class DwarfExpression {
protected:
- const AsmPrinter &AP;
// Various convenience accessors that extract things out of AsmPrinter.
- const TargetRegisterInfo *getTRI() const;
- unsigned getDwarfVersion() const;
+ const TargetRegisterInfo &TRI;
+ unsigned DwarfVersion;
public:
- DwarfExpression(const AsmPrinter &AP) : AP(AP) {}
+ DwarfExpression(const TargetRegisterInfo &TRI,
+ unsigned DwarfVersion)
+ : TRI(TRI), DwarfVersion(DwarfVersion) {}
virtual ~DwarfExpression() {}
/// Output a dwarf operand and an optional assembler comment.
virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
/// Emit a raw signed value.
- virtual void EmitSigned(int Value) = 0;
+ virtual void EmitSigned(int64_t Value) = 0;
/// Emit a raw unsigned value.
- virtual void EmitUnsigned(unsigned Value) = 0;
+ virtual void EmitUnsigned(uint64_t Value) = 0;
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(unsigned MachineReg) = 0;
@@ -105,27 +106,27 @@ class DebugLocDwarfExpression : public DwarfExpression {
ByteStreamer &BS;
public:
- DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS)
- : DwarfExpression(AP), BS(BS) {}
+ DebugLocDwarfExpression(const TargetRegisterInfo &TRI,
+ unsigned DwarfVersion, ByteStreamer &BS)
+ : DwarfExpression(TRI, DwarfVersion), BS(BS) {}
void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int Value) override;
- void EmitUnsigned(unsigned Value) override;
+ void EmitSigned(int64_t Value) override;
+ void EmitUnsigned(uint64_t Value) override;
bool isFrameRegister(unsigned MachineReg) override;
};
/// DwarfExpression implementation for singular DW_AT_location.
class DIEDwarfExpression : public DwarfExpression {
+const AsmPrinter &AP;
DwarfUnit &DU;
DIELoc &DIE;
public:
- DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE)
- : DwarfExpression(AP), DU(DU), DIE(DIE) {}
-
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int Value) override;
- void EmitUnsigned(unsigned Value) override;
+ void EmitSigned(int64_t Value) override;
+ void EmitUnsigned(uint64_t Value) override;
bool isFrameRegister(unsigned MachineReg) override;
};
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 3988f0d..60acc58e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -17,9 +17,8 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
-DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
- BumpPtrAllocator &DA)
- : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {}
+DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
+ : Asm(AP), StrPool(DA, *Asm, Pref) {}
DwarfFile::~DwarfFile() {}
@@ -48,15 +47,15 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
// Emit the various dwarf units to the unit section USection with
// the abbreviations going into ASection.
-void DwarfFile::emitUnits(const MCSymbol *ASectionSym) {
+void DwarfFile::emitUnits(bool UseOffsets) {
for (const auto &TheU : CUs) {
DIE &Die = TheU->getUnitDie();
const MCSection *USection = TheU->getSection();
Asm->OutStreamer.SwitchSection(USection);
- TheU->emitHeader(ASectionSym);
+ TheU->emitHeader(UseOffsets);
- DD.emitDIE(Die);
+ Asm->emitDwarfDIE(Die);
}
}
@@ -120,23 +119,13 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
Die.setSize(Offset - Die.getOffset());
return Offset;
}
+
void DwarfFile::emitAbbrevs(const MCSection *Section) {
// Check to see if it is worth the effort.
if (!Abbreviations.empty()) {
// Start the debug abbrev section.
Asm->OutStreamer.SwitchSection(Section);
-
- // For each abbrevation.
- for (const DIEAbbrev *Abbrev : Abbreviations) {
- // Emit the abbrevations code (base 1 index.)
- Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
-
- // Emit the abbreviations data.
- Abbrev->Emit(Asm);
- }
-
- // Mark end of abbreviations.
- Asm->EmitULEB128(0, "EOM(3)");
+ Asm->emitDwarfAbbrevs(Abbreviations);
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 35bf33a..c9de666 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -37,8 +37,6 @@ class DwarfFile {
// Target of Dwarf emission, used for sizing of abbreviations.
AsmPrinter *Asm;
- DwarfDebug &DD;
-
// Used to uniquely define abbreviations.
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -62,8 +60,7 @@ class DwarfFile {
DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
public:
- DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
- BumpPtrAllocator &DA);
+ DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
~DwarfFile();
@@ -83,7 +80,7 @@ public:
/// \brief Emit all of the units to the section listed with the given
/// abbreviation section.
- void emitUnits(const MCSymbol *ASectionSym);
+ void emitUnits(bool UseOffsets);
/// \brief Emit a set of abbreviations to the specific section.
void emitAbbrevs(const MCSection *);
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index d76b66c..165ef16 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -19,7 +19,7 @@ getEntry(AsmPrinter &Asm,
std::pair<MCSymbol *, unsigned> &Entry = Pool[Str];
if (!Entry.first) {
Entry.second = Pool.size() - 1;
- Entry.first = Asm.GetTempSymbol(Prefix, Entry.second);
+ Entry.first = Asm.createTempSymbol(Prefix);
}
return Entry;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index b0c7d48..f6af73f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -17,6 +17,7 @@
#include "DwarfDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -43,17 +44,23 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
cl::desc("Generate DWARF4 type units."),
cl::init(false));
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
+ DIELoc &DIE)
+ : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(),
+ AP.getDwarfDebug()->getDwarfVersion()),
+ AP(AP), DU(DU), DIE(DIE) {}
+
void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
}
-void DIEDwarfExpression::EmitSigned(int Value) {
+void DIEDwarfExpression::EmitSigned(int64_t Value) {
DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
}
-void DIEDwarfExpression::EmitUnsigned(unsigned Value) {
+void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
}
bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) {
- return MachineReg == getTRI()->getFrameRegister(*AP.MF);
+ return MachineReg == TRI.getFrameRegister(*AP.MF);
}
@@ -257,12 +264,14 @@ void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute,
/// to be in the local string pool instead of indirected.
void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String);
DIEValue *Value;
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
Value = new (DIEValueAllocator) DIELabel(Symb);
else
- Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym());
+ Value = new (DIEValueAllocator)
+ DIEDelta(Symb, TLOF.getDwarfStrSection()->getBeginSymbol());
DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
Die.addValue(Attribute, dwarf::DW_FORM_strp, Str);
}
@@ -750,6 +759,15 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
addBlock(Die, dwarf::DW_AT_const_value, Block);
}
+// Add a linkage name to the DIE.
+void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
+ if (!LinkageName.empty())
+ addString(Die,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+ : dwarf::DW_AT_MIPS_linkage_name,
+ GlobalValue::getRealLinkageName(LinkageName));
+}
+
/// addTemplateParams - Add template parameters into buffer.
void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
// Add template parameters.
@@ -1269,9 +1287,8 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
LinkageName == DeclLinkageName) &&
"decl has a linkage name and it is different");
- if (!LinkageName.empty() && DeclLinkageName.empty())
- addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
+ if (DeclLinkageName.empty())
+ addLinkageName(SPDie, LinkageName);
if (!DeclDie)
return false;
@@ -1344,9 +1361,8 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
if (SP.isOptimized())
addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
- if (unsigned isa = Asm->getISAEncoding(SP.getFunction())) {
+ if (unsigned isa = Asm->getISAEncoding())
addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
- }
if (SP.isLValueReference())
addFlag(SPDie, dwarf::DW_AT_reference);
@@ -1597,7 +1613,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
return &StaticMemberDIE;
}
-void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
+void DwarfUnit::emitHeader(bool UseOffsets) {
// Emit size of content not including length itself
Asm->OutStreamer.AddComment("Length of Unit");
Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
@@ -1605,14 +1621,16 @@ void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
Asm->OutStreamer.AddComment("DWARF version number");
Asm->EmitInt16(DD->getDwarfVersion());
Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+
// We share one abbreviations table across all units so it's always at the
// start of the section. Use a relocatable offset where needed to ensure
// linking doesn't invalidate that offset.
- if (ASectionSym)
- Asm->EmitSectionOffset(ASectionSym, ASectionSym);
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ if (!UseOffsets)
+ Asm->emitSectionOffset(TLOF.getDwarfAbbrevSection()->getBeginSymbol());
else
- // Use a constant value when no symbol is provided.
Asm->EmitInt32(0);
+
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
}
@@ -1622,8 +1640,8 @@ void DwarfUnit::initSection(const MCSection *Section) {
this->Section = Section;
}
-void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
- DwarfUnit::emitHeader(ASectionSym);
+void DwarfTypeUnit::emitHeader(bool UseOffsets) {
+ DwarfUnit::emitHeader(UseOffsets);
Asm->OutStreamer.AddComment("Type Signature");
Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer.AddComment("Type DIE Offset");
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 7a5e47d..81c5821 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -120,7 +120,6 @@ protected:
DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
- void initSection(const MCSection *Section);
/// Add a string attribute data and value.
void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
@@ -132,6 +131,8 @@ protected:
public:
virtual ~DwarfUnit();
+ void initSection(const MCSection *Section);
+
const MCSection *getSection() const {
assert(Section);
return Section;
@@ -251,6 +252,9 @@ public:
void addConstantFPValue(DIE &Die, const MachineOperand &MO);
void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
+ /// \brief Add a linkage name, if it isn't empty.
+ void addLinkageName(DIE &Die, StringRef LinkageName);
+
/// addTemplateParams - Add template parameters in buffer.
void addTemplateParams(DIE &Buffer, DIArray TParams);
@@ -321,7 +325,7 @@ public:
}
/// Emit the header for this unit, not including the initial length field.
- virtual void emitHeader(const MCSymbol *ASectionSym) const;
+ virtual void emitHeader(bool UseOffsets);
virtual DwarfCompileUnit &getCU() = 0;
@@ -423,12 +427,11 @@ public:
void setType(const DIE *Ty) { this->Ty = Ty; }
/// Emit the header for this unit, not including the initial length field.
- void emitHeader(const MCSymbol *ASectionSym) const override;
+ void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
sizeof(uint32_t); // Type DIE Offset
}
- using DwarfUnit::initSection;
DwarfCompileUnit &getCU() override { return CU; }
};
} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 4841814..14df4c9 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -436,12 +436,7 @@ void EHStreamer::emitExceptionTable() {
Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
Twine(Asm->getFunctionNumber()));
Asm->OutStreamer.EmitLabel(GCCETSym);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
- Asm->getFunctionNumber()));
-
- if (IsSJLJ)
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
- Asm->getFunctionNumber()));
+ Asm->OutStreamer.EmitLabel(Asm->getCurExceptionSym());
// Emit the LSDA header.
Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
@@ -552,16 +547,14 @@ void EHStreamer::emitExceptionTable() {
I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
const CallSiteEntry &S = *I;
- MCSymbol *EHFuncBeginSym =
- Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+ MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
MCSymbol *BeginLabel = S.BeginLabel;
if (!BeginLabel)
BeginLabel = EHFuncBeginSym;
MCSymbol *EndLabel = S.EndLabel;
if (!EndLabel)
- EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-
+ EndLabel = Asm->getFunctionEnd();
// Offset of the call site relative to the previous call site, counted in
// number of 16-byte bundles. The first call site is counted relative to
@@ -689,19 +682,3 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
Asm->EmitULEB128(TypeID);
}
}
-
-/// Emit all exception information that should come after the content.
-void EHStreamer::endModule() {
- llvm_unreachable("Should be implemented");
-}
-
-/// Gather pre-function exception information. Assumes it's being emitted
-/// immediately after the function entry point.
-void EHStreamer::beginFunction(const MachineFunction *MF) {
- llvm_unreachable("Should be implemented");
-}
-
-/// Gather and emit post-function exception information.
-void EHStreamer::endFunction(const MachineFunction *) {
- llvm_unreachable("Should be implemented");
-}
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 9b316ff..94d0585 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -125,16 +125,6 @@ public:
EHStreamer(AsmPrinter *A);
virtual ~EHStreamer();
- /// Emit all exception information that should come after the content.
- void endModule() override;
-
- /// Gather pre-function exception information. Assumes being emitted
- /// immediately after the function entry point.
- void beginFunction(const MachineFunction *MF) override;
-
- /// Gather and emit post-function exception information.
- void endFunction(const MachineFunction *) override;
-
// Unused.
void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
void beginInstruction(const MachineInstr *MI) override {}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 2b03877..7d76ead 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -48,8 +48,6 @@ Win64Exception::~Win64Exception() {}
void Win64Exception::endModule() {
}
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
void Win64Exception::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
@@ -80,9 +78,6 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
const MCSymbol *PersHandlerSym =
TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
}
/// endFunction - Gather and emit post-function exception information.
@@ -91,9 +86,6 @@ void Win64Exception::endFunction(const MachineFunction *) {
if (!shouldEmitPersonality && !shouldEmitMoves)
return;
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
- Asm->getFunctionNumber()));
-
// Map all labels and get rid of any dead landing pads.
MMI->TidyLandingPads();
@@ -170,10 +162,8 @@ void Win64Exception::emitCSpecificHandlerTable() {
SmallVector<CallSiteEntry, 64> CallSites;
computeCallSiteTable(CallSites, LandingPads, FirstActions);
- MCSymbol *EHFuncBeginSym =
- Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
- MCSymbol *EHFuncEndSym =
- Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+ MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
+ MCSymbol *EHFuncEndSym = Asm->getFunctionEnd();
// Emit the number of table entries.
unsigned NumEntries = 0;
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index b5e0929..d2b4eec 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -190,8 +190,11 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
return;
assert(FI.End && "Don't know where the function ends?");
- StringRef FuncName = getDISubprogram(GV).getDisplayName(),
- GVName = GV->getName();
+ StringRef GVName = GV->getName();
+ StringRef FuncName;
+ if (DISubprogram SP = getDISubprogram(GV))
+ FuncName = SP.getDisplayName();
+
// FIXME Clang currently sets DisplayName to "bar" for a C++
// "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying
// to demangle display names anyways, so let's just put a mangled name into
@@ -364,10 +367,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
FnDebugInfo.erase(GV);
VisitedFunctions.pop_back();
} else {
- // Define end label for subprogram.
- MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol();
- Asm->OutStreamer.EmitLabel(FunctionEndSym);
- CurFn->End = FunctionEndSym;
+ CurFn->End = Asm->getFunctionEnd();
}
CurFn = nullptr;
}
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 4b64be0..fa17108 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -48,7 +48,7 @@ namespace {
bool expandAtomicLoadToLL(LoadInst *LI);
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
bool expandAtomicStore(StoreInst *SI);
- bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool tryExpandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
@@ -135,9 +135,12 @@ bool AtomicExpand::runOnFunction(Function &F) {
// - into a load if it is idempotent
// - into a Cmpxchg/LL-SC loop otherwise
// we try them in that order.
- MadeChange |=
- (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
- (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI));
+
+ if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+ MadeChange = true;
+ } else {
+ MadeChange |= tryExpandAtomicRMW(RMWI);
+ }
} else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
@@ -211,7 +214,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
// or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
- // It is the responsibility of the target to only return true in
+ // It is the responsibility of the target to only signal expansion via
// shouldExpandAtomicRMW in cases where this is required and possible.
IRBuilder<> Builder(SI);
AtomicRMWInst *AI =
@@ -220,14 +223,26 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
- return expandAtomicRMW(AI);
+ return tryExpandAtomicRMW(AI);
}
-bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
- if (TLI->hasLoadLinkedStoreConditional())
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ case TargetLoweringBase::AtomicRMWExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
+ assert(TLI->hasLoadLinkedStoreConditional() &&
+ "TargetLowering requested we expand AtomicRMW instruction into "
+ "load-linked/store-conditional combos, but such instructions aren't "
+ "supported");
+
return expandAtomicRMWToLLSC(AI);
- else
+ }
+ case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
return expandAtomicRMWToCmpXchg(AI);
+ }
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index b8f05cd..abe7ca1 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -727,6 +728,62 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
+static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) {
+ auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end();
+ auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end();
+ if ((E1 - I1) != (E2 - I2))
+ return false;
+ for (; I1 != E1; ++I1, ++I2) {
+ if (**I1 != **I2)
+ return false;
+ }
+ return true;
+}
+
+static void
+removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
+ MachineBasicBlock &MBBCommon) {
+ // Remove MMOs from memory operations in the common block
+ // when they do not match the ones from the block being tail-merged.
+ // This ensures later passes conservatively compute dependencies.
+ MachineBasicBlock *MBB = MBBIStartPos->getParent();
+ // Note CommonTailLen does not necessarily matches the size of
+ // the common BB nor all its instructions because of debug
+ // instructions differences.
+ unsigned CommonTailLen = 0;
+ for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
+ ++CommonTailLen;
+
+ MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();
+ MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();
+ MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
+ MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
+
+ while (CommonTailLen--) {
+ assert(MBBI != MBBIE && "Reached BB end within common tail length!");
+ (void)MBBIE;
+
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+
+ while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
+ ++MBBICommon;
+
+ assert(MBBICommon != MBBIECommon &&
+ "Reached BB end within common tail length!");
+ assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
+
+ if (MBBICommon->mayLoad() || MBBICommon->mayStore())
+ if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
+ MBBICommon->clearMemRefs();
+
+ ++MBBI;
+ ++MBBICommon;
+ }
+}
+
// See if any of the blocks in MergePotentials (which all have a common single
// successor, or all have no successor) can be tail-merged. If there is a
// successor, any blocks in MergePotentials that are not tail-merged and
@@ -761,7 +818,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Sort by hash value so that blocks with identical end sequences sort
// together.
- std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+ array_pod_sort(MergePotentials.begin(), MergePotentials.end());
// Walk through equivalence sets looking for actual exact matches.
while (MergePotentials.size() > 1) {
@@ -840,6 +897,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
continue;
DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
<< (i == e-1 ? "" : ", "));
+ // Remove MMOs from memory operations as needed.
+ removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB);
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index f21d4d2..ef57638 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -19,7 +19,6 @@ add_llvm_library(LLVMCodeGen
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
- ForwardControlFlowIntegrity.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCRootLowering.cpp
@@ -29,7 +28,6 @@ add_llvm_library(LLVMCodeGen
InlineSpiller.cpp
InterferenceCache.cpp
IntrinsicLowering.cpp
- JumpInstrTables.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
LexicalScopes.cpp
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 7c0068e..da66639 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -24,9 +24,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBranchFolderPassPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
- initializeExpandPostRAPass(Registry);
initializeExpandISelPseudosPass(Registry);
+ initializeExpandPostRAPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
@@ -36,31 +37,34 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
- initializeMachineCopyPropagationPass(Registry);
- initializeMachineCombinerPass(Registry);
initializeMachineCSEPass(Registry);
+ initializeMachineCombinerPass(Registry);
+ initializeMachineCopyPropagationPass(Registry);
initializeMachineDominatorTreePass(Registry);
- initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
+ initializeMachinePostDominatorTreePass(Registry);
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineVerifierPassPass(Registry);
initializeOptimizePHIsPass(Registry);
+ initializePEIPass(Registry);
initializePHIEliminationPass(Registry);
initializePeepholeOptimizerPass(Registry);
initializePostMachineSchedulerPass(Registry);
initializePostRASchedulerPass(Registry);
initializeProcessImplicitDefsPass(Registry);
- initializePEIPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeSlotIndexesPass(Registry);
- initializeStackProtectorPass(Registry);
initializeStackColoringPass(Registry);
+ initializeStackMapLivenessPass(Registry);
+ initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeTailDuplicatePassPass(Registry);
initializeTargetPassConfigPass(Registry);
@@ -70,9 +74,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeUnreachableMachineBlockElimPass(Registry);
initializeVirtRegMapPass(Registry);
initializeVirtRegRewriterPass(Registry);
- initializeLowerIntrinsicsPass(Registry);
- initializeMachineFunctionPrinterPassPass(Registry);
- initializeStackMapLivenessPass(Registry);
+ initializeWinEHPreparePass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c0d7dca..6c9d048 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -124,7 +124,6 @@ class TypePromotionTransaction;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
- DominatorTree *DT;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
@@ -142,8 +141,7 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
- /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
- /// be updated.
+ /// ModifiedDT - If CFG is modified in anyway.
bool ModifiedDT;
/// OptSize - True if optimizing for size.
@@ -186,7 +184,7 @@ class TypePromotionTransaction;
bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInst);
+ unsigned CreatedInstCost);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
};
@@ -214,9 +212,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
/// This optimization identifies DIV instructions that can be
@@ -255,7 +250,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
- ModifiedDT |= ModifiedDTOnIteration;
if (ModifiedDTOnIteration)
break;
}
@@ -298,8 +292,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (EverMadeChange || MadeChange)
MadeChange |= EliminateFallThrough(F);
- if (MadeChange)
- ModifiedDT = true;
EverMadeChange |= MadeChange;
}
@@ -313,9 +305,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |= simplifyOffsetableRelocate(*I);
}
- if (ModifiedDT && DT)
- DT->recalculate(F);
-
return EverMadeChange;
}
@@ -341,7 +330,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// Remember if SinglePred was the entry block of the function.
// If so, we will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(BB, DT);
+ MergeBasicBlockIntoOnlyPred(BB, nullptr);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -481,7 +470,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, DT);
+ MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -523,13 +512,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
- if (DT && !ModifiedDT) {
- BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
- BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
- DT->changeImmediateDominator(DestBB, NewIDom);
- DT->eraseNode(BB);
- }
BB->eraseFromParent();
++NumBlocksElim;
@@ -561,12 +543,15 @@ static void computeBaseDerivedRelocateMap(
IntrinsicInst *I = Item.second;
auto BaseKey = std::make_pair(Key.first, Key.first);
- IntrinsicInst *Base = RelocateIdxMap[BaseKey];
- if (!Base)
+
+ // We're iterating over RelocateIdxMap so we cannot modify it.
+ auto MaybeBase = RelocateIdxMap.find(BaseKey);
+ if (MaybeBase == RelocateIdxMap.end())
// TODO: We might want to insert a new base object relocate and gep off
// that, if there are enough derived object relocates.
continue;
- RelocateInstMap[Base].push_back(I);
+
+ RelocateInstMap[MaybeBase->second].push_back(I);
}
}
@@ -615,8 +600,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
// Create a Builder and replace the target callsite with a gep
IRBuilder<> Builder(ToReplace);
Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
- Value *Replacement =
- Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV));
+ Value *Replacement = Builder.CreateGEP(
+ Derived->getSourceElementType(), RelocatedBase, makeArrayRef(OffsetV));
Instruction *ReplacementInst = cast<Instruction>(Replacement);
ReplacementInst->removeFromParent();
ReplacementInst->insertAfter(RelocatedBase);
@@ -1225,6 +1210,42 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return true;
}
+ const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
+
+ // Align the pointer arguments to this call if the target thinks it's a good
+ // idea
+ unsigned MinSize, PrefAlign;
+ if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ for (auto &Arg : CI->arg_operands()) {
+ // We want to align both objects whose address is used directly and
+ // objects whose address is used in casts and GEPs, though it only makes
+ // sense for GEPs if the offset is a multiple of the desired alignment and
+ // if size - offset meets the size threshold.
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ APInt Offset(TD->getPointerSizeInBits(
+ cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
+ Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+ uint64_t Offset2 = Offset.getLimitedValue();
+ AllocaInst *AI;
+ if ((Offset2 & (PrefAlign-1)) == 0 &&
+ (AI = dyn_cast<AllocaInst>(Val)) &&
+ AI->getAlignment() < PrefAlign &&
+ TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+ AI->setAlignment(PrefAlign);
+ // TODO: Also align GlobalVariables
+ }
+ // If this is a memcpy (or similar) then we may be able to improve the
+ // alignment
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+ unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+ if (Align > MI->getAlignment())
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+ }
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
@@ -1241,8 +1262,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
WeakVH IterHandle(CurInstIterator);
replaceAndRecursivelySimplify(CI, RetVal,
- TLI ? TLI->getDataLayout() : nullptr,
- TLInfo, ModifiedDT ? nullptr : DT);
+ TLInfo, nullptr);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
@@ -1284,15 +1304,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
// From here on out we're working with named functions.
if (!CI->getCalledFunction()) return false;
- // We'll need DataLayout from here on out.
- const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
- if (!TD) return false;
-
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
// to fortified library functions (e.g. __memcpy_chk) that have the default
// "don't know" as the objectsize. Anything else should be left alone.
- FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true);
+ FortifiedLibCallSimplifier Simplifier(TLInfo, true);
if (Value *V = Simplifier.optimizeCall(CI)) {
CI->replaceAllUsesWith(V);
CI->eraseFromParent();
@@ -2025,7 +2041,7 @@ private:
ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter);
bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
- bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
+ bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
Value *PromotedOperand) const;
};
@@ -2159,7 +2175,7 @@ class TypePromotionHelper {
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is a promotable trunc or sext or zext.
/// \p PromotedInsts maps the instructions to their type before promotion.
- /// \p CreatedInsts[out] contains how many non-free instructions have been
+ /// \p CreatedInstsCost[out] contains the cost of all instructions
/// created to promote the operand of Ext.
/// Newly added extensions are inserted in \p Exts.
/// Newly added truncates are inserted in \p Truncs.
@@ -2167,53 +2183,55 @@ class TypePromotionHelper {
/// \return The promoted value which is used instead of Ext.
static Value *promoteOperandForTruncAndAnyExt(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs);
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
- /// \p CreatedInsts[out] contains how many non-free instructions have been
+ /// \p CreatedInstsCost[out] contains the cost of all the instructions
/// created to promote the operand of Ext.
/// Newly added extensions are inserted in \p Exts.
/// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *
- promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
+ static Value *promoteOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI, bool IsSExt);
/// \see promoteOperandForOther.
- static Value *
- signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
- Truncs, true);
+ static Value *signExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, true);
}
/// \see promoteOperandForOther.
- static Value *
- zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
- Truncs, false);
+ static Value *zeroExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, false);
}
public:
/// Type for the utility function that promotes the operand of Ext.
typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs);
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI);
/// \brief Given a sign/zero extend instruction \p Ext, return the approriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
@@ -2330,16 +2348,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
Value *ExtVal = SExt;
+ bool HasMergedNonFreeExt = false;
if (isa<ZExtInst>(SExtOpnd)) {
// Replace s|zext(zext(opnd))
// => zext(opnd).
+ HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
Value *ZExt =
TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
TPT.replaceAllUsesWith(SExt, ZExt);
@@ -2350,7 +2370,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// => z|sext(opnd).
TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
}
- CreatedInsts = 0;
+ CreatedInstsCost = 0;
// Remove dead code.
if (SExtOpnd->use_empty())
@@ -2359,8 +2379,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// Check if the extension is still needed.
Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
- if (ExtInst && Exts)
- Exts->push_back(ExtInst);
+ if (ExtInst) {
+ if (Exts)
+ Exts->push_back(ExtInst);
+ CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
+ }
return ExtVal;
}
@@ -2373,13 +2396,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
Value *TypePromotionHelper::promoteOperandForOther(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
+ bool IsSExt) {
// By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
- CreatedInsts = 0;
+ CreatedInstsCost = 0;
if (!ExtOpnd->hasOneUse()) {
// ExtOpnd will be promoted.
// All its uses, but Ext, will need to use a truncated value of the
@@ -2454,7 +2478,6 @@ Value *TypePromotionHelper::promoteOperandForOther(
continue;
}
ExtForOpnd = cast<Instruction>(ValForExtOpnd);
- ++CreatedInsts;
}
if (Exts)
Exts->push_back(ExtForOpnd);
@@ -2463,6 +2486,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
// Move the sign extension before the insertion point.
TPT.moveBefore(ExtForOpnd, ExtOpnd);
TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
+ CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
// If more sext are required, new instructions will have to be created.
ExtForOpnd = nullptr;
}
@@ -2475,22 +2499,22 @@ Value *TypePromotionHelper::promoteOperandForOther(
/// IsPromotionProfitable - Check whether or not promoting an instruction
/// to a wider type was profitable.
-/// \p MatchedSize gives the number of instructions that have been matched
-/// in the addressing mode after the promotion was applied.
-/// \p SizeWithPromotion gives the number of created instructions for
-/// the promotion plus the number of instructions that have been
-/// matched in the addressing mode before the promotion.
+/// \p NewCost gives the cost of extension instructions created by the
+/// promotion.
+/// \p OldCost gives the cost of extension instructions before the promotion
+/// plus the number of instructions that have been
+/// matched in the addressing mode the promotion.
/// \p PromotedOperand is the value that has been promoted.
/// \return True if the promotion is profitable, false otherwise.
-bool
-AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
- unsigned SizeWithPromotion,
- Value *PromotedOperand) const {
- // We folded less instructions than what we created to promote the operand.
+bool AddressingModeMatcher::IsPromotionProfitable(
+ unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
+ DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
+ // The cost of the new extensions is greater than the cost of the
+ // old extension plus what we folded.
// This is not profitable.
- if (MatchedSize < SizeWithPromotion)
+ if (NewCost > OldCost)
return false;
- if (MatchedSize > SizeWithPromotion)
+ if (NewCost < OldCost)
return true;
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
@@ -2688,9 +2712,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- unsigned CreatedInsts = 0;
+ unsigned CreatedInstsCost = 0;
+ unsigned ExtCost = !TLI.isExtFree(Ext);
Value *PromotedOperand =
- TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
+ TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
@@ -2712,7 +2737,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned OldSize = AddrModeInsts.size();
if (!MatchAddr(PromotedOperand, Depth) ||
- !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
+ // The total of the new cost is equals to the cost of the created
+ // instructions.
+ // The total of the old cost is equals to the cost of the extension plus
+ // what we have saved in the addressing mode.
+ !IsPromotionProfitable(CreatedInstsCost,
+ ExtCost + (AddrModeInsts.size() - OldSize),
PromotedOperand)) {
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
@@ -3472,7 +3502,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
LoadInst *&LI, Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInsts = 0) {
+ unsigned CreatedInstsCost = 0) {
// Iterate over all the extensions to see if one form an ext(load).
for (auto I : Exts) {
// Check if we directly have ext(load).
@@ -3494,10 +3524,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
SmallVector<Instruction *, 4> NewExts;
- unsigned NewCreatedInsts = 0;
+ unsigned NewCreatedInstsCost = 0;
+ unsigned ExtCost = !TLI->isExtFree(I);
// Promote.
- Value *PromotedVal =
- TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+ Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
+ &NewExts, nullptr, *TLI);
assert(PromotedVal &&
"TypePromotionHelper should have filtered out those cases");
@@ -3507,9 +3538,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
// With exactly 2, the transformation is neutral, because we will merge
// one extension but leave one. However, we optimistically keep going,
// because the new extension may be removed too.
- unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+ long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
+ TotalCreatedInstsCost -= ExtCost;
if (!StressExtLdPromotion &&
- (TotalCreatedInsts > 1 ||
+ (TotalCreatedInstsCost > 1 ||
!isPromotedInstructionLegal(*TLI, PromotedVal))) {
// The promotion is not profitable, rollback to the previous state.
TPT.rollback(LastKnownGood);
@@ -3517,8 +3549,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
}
// The promotion is profitable.
// Check if it exposes an ext(load).
- (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
- if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+ (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+ if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
// If we have created a new extension, i.e., now we have two
// extensions. We must make sure one of them is merged with
// the load, otherwise we may degrade the code quality.
@@ -4193,8 +4225,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
- TLInfo, DT)) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
@@ -4463,8 +4495,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
bool CodeGenPrepare::splitBranchCondition(Function &F) {
- if (!TM || TM->Options.EnableFastISel != true ||
- !TLI || TLI->isJumpExpensive())
+ if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
return false;
bool MadeChange = false;
@@ -4625,10 +4656,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
}
- // Request DOM Tree update.
// Note: No point in getting fancy here, since the DT info is never
- // available to CodeGenPrepare and the existing update code is broken
- // anyways.
+ // available to CodeGenPrepare.
ModifiedDT = true;
MadeChange = true;
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 7b47a48..42656fb 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -13,13 +13,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "dwarfehprepare"
@@ -33,18 +39,28 @@ namespace {
// RewindFunction - _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
+ DominatorTree *DT;
+ const TargetLowering *TLI;
+
bool InsertUnwindResumeCalls(Function &Fn);
Value *GetExceptionObject(ResumeInst *RI);
+ size_t
+ pruneUnreachableResumes(Function &Fn,
+ SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads);
public:
static char ID; // Pass identification, replacement for typeid.
// INITIALIZE_TM_PASS requires a default constructor, but it isn't used in
// practice.
- DwarfEHPrepare() : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr) {}
+ DwarfEHPrepare()
+ : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr),
+ TLI(nullptr) {}
DwarfEHPrepare(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {}
+ : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr),
+ TLI(nullptr) {}
bool runOnFunction(Function &Fn) override;
@@ -53,6 +69,8 @@ namespace {
return false;
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
const char *getPassName() const override {
return "Exception handling preparation";
}
@@ -60,13 +78,22 @@ namespace {
} // end anonymous namespace
char DwarfEHPrepare::ID = 0;
-INITIALIZE_TM_PASS(DwarfEHPrepare, "dwarfehprepare", "Prepare DWARF exceptions",
- false, false)
+INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
+ "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare",
+ "Prepare DWARF exceptions", false, false)
FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
return new DwarfEHPrepare(TM);
}
+void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+}
+
/// GetExceptionObject - Return the exception object from the value passed into
/// the 'resume' instruction (typically an aggregate). Clean up any dead
/// instructions, including the 'resume' instruction.
@@ -107,21 +134,81 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
return ExnObj;
}
+/// Replace resumes that are not reachable from a cleanup landing pad with
+/// unreachable and then simplify those blocks.
+size_t DwarfEHPrepare::pruneUnreachableResumes(
+ Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads) {
+ BitVector ResumeReachable(Resumes.size());
+ size_t ResumeIndex = 0;
+ for (auto *RI : Resumes) {
+ for (auto *LP : CleanupLPads) {
+ if (isPotentiallyReachable(LP, RI, DT)) {
+ ResumeReachable.set(ResumeIndex);
+ break;
+ }
+ }
+ ++ResumeIndex;
+ }
+
+ // If everything is reachable, there is no change.
+ if (ResumeReachable.all())
+ return Resumes.size();
+
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
+ LLVMContext &Ctx = Fn.getContext();
+
+ // Otherwise, insert unreachable instructions and call simplifycfg.
+ size_t ResumesLeft = 0;
+ for (size_t I = 0, E = Resumes.size(); I < E; ++I) {
+ ResumeInst *RI = Resumes[I];
+ if (ResumeReachable[I]) {
+ Resumes[ResumesLeft++] = RI;
+ } else {
+ BasicBlock *BB = RI->getParent();
+ new UnreachableInst(Ctx, RI);
+ RI->eraseFromParent();
+ SimplifyCFG(BB, TTI, 1);
+ }
+ }
+ Resumes.resize(ResumesLeft);
+ return ResumesLeft;
+}
+
/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
/// into calls to the appropriate _Unwind_Resume function.
bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
SmallVector<ResumeInst*, 16> Resumes;
+ SmallVector<LandingPadInst*, 16> CleanupLPads;
+ bool FoundLP = false;
for (BasicBlock &BB : Fn) {
if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
Resumes.push_back(RI);
+ if (auto *LP = BB.getLandingPadInst()) {
+ if (LP->isCleanup())
+ CleanupLPads.push_back(LP);
+ // Check the personality on the first landingpad. Don't do anything if
+ // it's for MSVC.
+ if (!FoundLP) {
+ FoundLP = true;
+ EHPersonality Pers = classifyEHPersonality(LP->getPersonalityFn());
+ if (isMSVCEHPersonality(Pers))
+ return false;
+ }
+ }
}
if (Resumes.empty())
return false;
- // Find the rewind function if we didn't already.
- const TargetLowering *TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
LLVMContext &Ctx = Fn.getContext();
+
+ size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+ if (ResumesLeft == 0)
+ return true; // We pruned them all.
+
+ // Find the rewind function if we didn't already.
if (!RewindFunction) {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
Type::getInt8PtrTy(Ctx), false);
@@ -130,9 +217,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
}
// Create the basic block where the _Unwind_Resume call will live.
- unsigned ResumesSize = Resumes.size();
-
- if (ResumesSize == 1) {
+ if (ResumesLeft == 1) {
// Instead of creating a new BB and PHI node, just append the call to
// _Unwind_Resume to the end of the single resume block.
ResumeInst *RI = Resumes.front();
@@ -149,7 +234,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
}
BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
- PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft,
"exn.obj", UnwindBB);
// Extract the exception object from the ResumeInst and add it to the PHI node
@@ -175,6 +260,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
assert(TM && "DWARF EH preparation requires a target machine");
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
bool Changed = InsertUnwindResumeCalls(Fn);
+ DT = nullptr;
+ TLI = nullptr;
return Changed;
}
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index b3a22c8..5b09cf1 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -113,7 +113,7 @@ struct DomainValue {
}
namespace {
-/// LiveReg - Information about a live register.
+/// Information about a live register.
struct LiveReg {
/// Value currently in this register, or NULL when no value is being tracked.
/// This counts as a DomainValue reference.
@@ -125,7 +125,7 @@ struct LiveReg {
/// will be a negative number.
int Def;
};
-} // anonynous namespace
+} // anonymous namespace
namespace {
class ExeDepsFix : public MachineFunctionPass {
@@ -174,7 +174,7 @@ public:
private:
iterator_range<SmallVectorImpl<int>::const_iterator>
- regIndizes(unsigned Reg) const;
+ regIndices(unsigned Reg) const;
// DomainValue allocation.
DomainValue *alloc(int domain = -1);
@@ -205,10 +205,10 @@ private:
char ExeDepsFix::ID = 0;
-/// Translate TRI register number to a list of indizes into our stmaller tables
+/// Translate TRI register number to a list of indices into our smaller tables
/// of interesting registers.
iterator_range<SmallVectorImpl<int>::const_iterator>
-ExeDepsFix::regIndizes(unsigned Reg) const {
+ExeDepsFix::regIndices(unsigned Reg) const {
assert(Reg < AliasMap.size() && "Invalid register");
const auto &Entry = AliasMap[Reg];
return make_range(Entry.begin(), Entry.end());
@@ -225,7 +225,7 @@ DomainValue *ExeDepsFix::alloc(int domain) {
return dv;
}
-/// release - Release a reference to DV. When the last reference is released,
+/// Release a reference to DV. When the last reference is released,
/// collapse if needed.
void ExeDepsFix::release(DomainValue *DV) {
while (DV) {
@@ -245,8 +245,8 @@ void ExeDepsFix::release(DomainValue *DV) {
}
}
-/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
-/// reached. Update the referenced pointer when necessary.
+/// Follow the chain of dead DomainValues until a live DomainValue is reached.
+/// Update the referenced pointer when necessary.
DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
DomainValue *DV = DVRef;
if (!DV || !DV->Next)
@@ -325,8 +325,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
setLiveReg(rx, alloc(domain));
}
-/// Merge - All instructions and registers in B are moved to A, and B is
-/// released.
+/// All instructions and registers in B are moved to A, and B is released.
bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed");
assert(!B->isCollapsed() && "Cannot merge from collapsed");
@@ -352,7 +351,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
return true;
}
-// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+/// Set up LiveRegs by merging predecessor live-out values.
void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Detect back-edges from predecessors we haven't processed yet.
SeenUnknownBackEdge = false;
@@ -378,7 +377,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
if (MBB->pred_empty()) {
for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
e = MBB->livein_end(); i != e; ++i) {
- for (int rx : regIndizes(*i)) {
+ for (int rx : regIndices(*i)) {
// Treat function live-ins as if they were defined just before the first
// instruction. Usually, function arguments are set up immediately
// before the call.
@@ -475,7 +474,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
unsigned reg = MI->getOperand(OpIdx).getReg();
- for (int rx : regIndizes(reg)) {
+ for (int rx : regIndices(reg)) {
unsigned Clearance = CurInstr - LiveRegs[rx].Def;
DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
@@ -521,7 +520,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
break;
if (MO.isUse())
continue;
- for (int rx : regIndizes(MO.getReg())) {
+ for (int rx : regIndices(MO.getReg())) {
// This instruction explicitly defines rx.
DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
<< '\t' << *MI);
@@ -587,7 +586,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- for (int rx : regIndizes(mo.getReg())) {
+ for (int rx : regIndices(mo.getReg())) {
force(rx, domain);
}
}
@@ -596,7 +595,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- for (int rx : regIndizes(mo.getReg())) {
+ for (int rx : regIndices(mo.getReg())) {
kill(rx);
force(rx, domain);
}
@@ -616,7 +615,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- for (int rx : regIndizes(mo.getReg())) {
+ for (int rx : regIndices(mo.getReg())) {
DomainValue *dv = LiveRegs[rx].Value;
if (dv == nullptr)
continue;
@@ -712,7 +711,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
ii != ee; ++ii) {
MachineOperand &mo = *ii;
if (!mo.isReg()) continue;
- for (int rx : regIndizes(mo.getReg())) {
+ for (int rx : regIndices(mo.getReg())) {
if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
kill(rx);
setLiveReg(rx, dv);
diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp
deleted file mode 100644
index 63c3699..0000000
--- a/lib/CodeGen/ForwardControlFlowIntegrity.cpp
+++ /dev/null
@@ -1,374 +0,0 @@
-//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief A pass that instruments code with fast checks for indirect calls and
-/// hooks for a function to check violations.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "cfi"
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
-#include "llvm/CodeGen/JumpInstrTables.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-STATISTIC(NumCFIIndirectCalls,
- "Number of indirect call sites rewritten by the CFI pass");
-
-char ForwardControlFlowIntegrity::ID = 0;
-INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi",
- "Control-Flow Integrity", true, true)
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTables);
-INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi",
- "Control-Flow Integrity", true, true)
-
-ModulePass *llvm::createForwardControlFlowIntegrityPass() {
- return new ForwardControlFlowIntegrity();
-}
-
-ModulePass *llvm::createForwardControlFlowIntegrityPass(
- JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
- StringRef CFIFuncName) {
- return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing,
- CFIFuncName);
-}
-
-// Checks to see if a given CallSite is making an indirect call, including
-// cases where the indirect call is made through a bitcast.
-static bool isIndirectCall(CallSite &CS) {
- if (CS.getCalledFunction())
- return false;
-
- // Check the value to see if it is merely a bitcast of a function. In
- // this case, it will translate to a direct function call in the resulting
- // assembly, so we won't treat it as an indirect call here.
- const Value *V = CS.getCalledValue();
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
- }
-
- // Otherwise, since we know it's a call, it must be an indirect call
- return true;
-}
-
-static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning";
-
-ForwardControlFlowIntegrity::ForwardControlFlowIntegrity()
- : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single),
- CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") {
- initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
-}
-
-ForwardControlFlowIntegrity::ForwardControlFlowIntegrity(
- JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
- std::string CFIFuncName)
- : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType),
- CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) {
- initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
-}
-
-ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {}
-
-void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<JumpInstrTableInfo>();
- AU.addRequired<JumpInstrTables>();
-}
-
-void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) {
- // To get the indirect calls, we iterate over all functions and iterate over
- // the list of basic blocks in each. We extract a total list of indirect calls
- // before modifying any of them, since our modifications will modify the list
- // of basic blocks.
- for (Function &F : M) {
- for (BasicBlock &BB : F) {
- for (Instruction &I : BB) {
- CallSite CS(&I);
- if (!(CS && isIndirectCall(CS)))
- continue;
-
- Value *CalledValue = CS.getCalledValue();
-
- // Don't rewrite this instruction if the indirect call is actually just
- // inline assembly, since our transformation will generate an invalid
- // module in that case.
- if (isa<InlineAsm>(CalledValue))
- continue;
-
- IndirectCalls.push_back(&I);
- }
- }
- }
-}
-
-void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M,
- CFITables &CFIT) {
- Type *Int64Ty = Type::getInt64Ty(M.getContext());
- for (Instruction *I : IndirectCalls) {
- CallSite CS(I);
- Value *CalledValue = CS.getCalledValue();
-
- // Get the function type for this call and look it up in the tables.
- Type *VTy = CalledValue->getType();
- PointerType *PTy = dyn_cast<PointerType>(VTy);
- Type *EltTy = PTy->getElementType();
- FunctionType *FunTy = dyn_cast<FunctionType>(EltTy);
- FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy);
- ++NumCFIIndirectCalls;
- Constant *JumpTableStart = nullptr;
- Constant *JumpTableMask = nullptr;
- Constant *JumpTableSize = nullptr;
-
- // Some call sites have function types that don't correspond to any
- // address-taken function in the module. This happens when function pointers
- // are passed in from external code.
- auto it = CFIT.find(TransformedTy);
- if (it == CFIT.end()) {
- // In this case, make sure that the function pointer will change by
- // setting the mask and the start to be 0 so that the transformed
- // function is 0.
- JumpTableStart = ConstantInt::get(Int64Ty, 0);
- JumpTableMask = ConstantInt::get(Int64Ty, 0);
- JumpTableSize = ConstantInt::get(Int64Ty, 0);
- } else {
- JumpTableStart = it->second.StartValue;
- JumpTableMask = it->second.MaskValue;
- JumpTableSize = it->second.Size;
- }
-
- rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask,
- JumpTableSize);
- }
-
- return;
-}
-
-bool ForwardControlFlowIntegrity::runOnModule(Module &M) {
- JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>();
- Type *Int64Ty = Type::getInt64Ty(M.getContext());
- Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-
- // JumpInstrTableInfo stores information about the alignment of each entry.
- // The alignment returned by JumpInstrTableInfo is alignment in bytes, not
- // in the exponent.
- ByteAlignment = JITI->entryByteAlignment();
- LogByteAlignment = llvm::Log2_64(ByteAlignment);
-
- // Set up tables for control-flow integrity based on information about the
- // jump-instruction tables.
- CFITables CFIT;
- for (const auto &KV : JITI->getTables()) {
- uint64_t Size = static_cast<uint64_t>(KV.second.size());
- uint64_t TableSize = NextPowerOf2(Size);
-
- int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment;
- Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue);
- Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size);
-
- // The base of the table is defined to be the first jumptable function in
- // the table.
- Function *First = KV.second.begin()->second;
- Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy);
- CFIT[KV.first].StartValue = JumpTableStartValue;
- CFIT[KV.first].MaskValue = JumpTableMaskValue;
- CFIT[KV.first].Size = JumpTableSize;
- }
-
- if (CFIT.empty())
- return false;
-
- getIndirectCalls(M);
-
- if (!CFIEnforcing) {
- addWarningFunction(M);
- }
-
- // Update the instructions with the check and the indirect jump through our
- // table.
- updateIndirectCalls(M, CFIT);
-
- return true;
-}
-
-void ForwardControlFlowIntegrity::addWarningFunction(Module &M) {
- PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext());
-
- // Get the type of the Warning Function: void (i8*, i8*),
- // where the first argument is the name of the function in which the violation
- // occurs, and the second is the function pointer that violates CFI.
- SmallVector<Type *, 2> WarningFunArgs;
- WarningFunArgs.push_back(CharPtrTy);
- WarningFunArgs.push_back(CharPtrTy);
- FunctionType *WarningFunTy =
- FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false);
-
- if (!CFIFuncName.empty()) {
- Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy);
- if (!FailureFun)
- report_fatal_error("Could not get or insert the function specified by"
- " -cfi-func-name");
- } else {
- // The default warning function swallows the warning and lets the call
- // continue, since there's no generic way for it to print out this
- // information.
- Function *WarningFun = M.getFunction(cfi_failure_func_name);
- if (!WarningFun) {
- WarningFun =
- Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage,
- cfi_failure_func_name, &M);
- }
-
- BasicBlock *Entry =
- BasicBlock::Create(M.getContext(), "entry", WarningFun, 0);
- ReturnInst::Create(M.getContext(), Entry);
- }
-}
-
-void ForwardControlFlowIntegrity::rewriteFunctionPointer(
- Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart,
- Constant *JumpTableMask, Constant *JumpTableSize) {
- IRBuilder<> TempBuilder(I);
-
- Type *OrigFunType = FunPtr->getType();
-
- BasicBlock *CurBB = cast<BasicBlock>(I->getParent());
- Function *CurF = cast<Function>(CurBB->getParent());
- Type *Int64Ty = Type::getInt64Ty(M.getContext());
-
- Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty);
- Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty);
-
- Value *NewFunPtr = nullptr;
- Value *Check = nullptr;
- switch (CFIType) {
- case CFIntegrity::Sub: {
- // This is the subtract, mask, and add version.
- // Subtract from the base.
- Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
-
- // Mask the difference to force this to be a table offset.
- Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask);
-
- // Add it back to the base.
- Value *Result = TempBuilder.CreateAdd(And, TStartInt);
-
- // Convert it back into a function pointer that we can call.
- NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
- break;
- }
- case CFIntegrity::Ror: {
- // This is the subtract and rotate version.
- // Rotate right by the alignment value. The optimizer should recognize
- // this sequence as a rotation.
-
- // This cast is safe, since unsigned is always a subset of uint64_t.
- uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment);
- Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64);
- Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64);
-
- // Subtract from the base.
- Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
-
- // Create the equivalent of a rotate-right instruction.
- Value *Shr = TempBuilder.CreateLShr(Sub, RightShift);
- Value *Shl = TempBuilder.CreateShl(Sub, LeftShift);
- Value *Or = TempBuilder.CreateOr(Shr, Shl);
-
- // Perform unsigned comparison to check for inclusion in the table.
- Check = TempBuilder.CreateICmpULT(Or, JumpTableSize);
- NewFunPtr = FunPtr;
- break;
- }
- case CFIntegrity::Add: {
- // This is the mask and add version.
- // Mask the function pointer to turn it into an offset into the table.
- Value *And = TempBuilder.CreateAnd(TI, JumpTableMask);
-
- // Then or this offset to the base and get the pointer value.
- Value *Result = TempBuilder.CreateAdd(And, TStartInt);
-
- // Convert it back into a function pointer that we can call.
- NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
- break;
- }
- }
-
- if (!CFIEnforcing) {
- // If a check hasn't been added (in the rotation version), then check to see
- // if it's the same as the original function. This check determines whether
- // or not we call the CFI failure function.
- if (!Check)
- Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr);
- BasicBlock *InvalidPtrBlock =
- BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0);
- BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I);
-
- // Remove the unconditional branch that connects the two blocks.
- TerminatorInst *TermInst = CurBB->getTerminator();
- TermInst->eraseFromParent();
-
- // Add a conditional branch that depends on the Check above.
- BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB);
-
- // Call the warning function for this pointer, then continue.
- Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock);
- insertWarning(M, InvalidPtrBlock, BI, FunPtr);
- } else {
- // Modify the instruction to call this value.
- CallSite CS(I);
- CS.setCalledFunction(NewFunPtr);
- }
-}
-
-void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block,
- Instruction *I, Value *FunPtr) {
- Function *ParentFun = cast<Function>(Block->getParent());
-
- // Get the function to call right before the instruction.
- Function *WarningFun = nullptr;
- if (CFIFuncName.empty()) {
- WarningFun = M.getFunction(cfi_failure_func_name);
- } else {
- WarningFun = M.getFunction(CFIFuncName);
- }
-
- assert(WarningFun && "Could not find the CFI failure function");
-
- Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-
- IRBuilder<> WarningInserter(I);
- // Create a mergeable GlobalVariable containing the name of the function.
- Value *ParentNameGV =
- WarningInserter.CreateGlobalString(ParentFun->getName());
- Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy);
- Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy);
- WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr);
-}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 7a29569..b8799a5 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -247,7 +247,7 @@ namespace {
return true;
else if (Incr1 == Incr2) {
// Favors subsumption.
- if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ if (!C1->NeedSubsumption && C2->NeedSubsumption)
return true;
else if (C1->NeedSubsumption == C2->NeedSubsumption) {
// Favors diamond over triangle, etc.
@@ -726,6 +726,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
if (BBI.IsDone || BBI.IsUnpredicable)
return false;
+ // If it is already predicated but we couldn't analyze its terminator, the
+ // latter might fallthrough, but we can't determine where to.
+ // Conservatively avoid if-converting again.
+ if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
+ return false;
+
// If it is already predicated, check if the new predicate subsumes
// its predicate.
if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
@@ -1555,7 +1561,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
UpdatePredRedefs(I, Redefs);
}
- std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+ BBI.Predicate.append(Cond.begin(), Cond.end());
BBI.IsAnalyzed = false;
BBI.NonPredSize = 0;
@@ -1620,9 +1626,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
}
}
- std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
- std::back_inserter(ToBBI.Predicate));
- std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+ ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
+ ToBBI.Predicate.append(Cond.begin(), Cond.end());
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
ToBBI.IsAnalyzed = false;
@@ -1661,8 +1666,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
if (NBB && !FromBBI.BB->isSuccessor(NBB))
FromBBI.BB->addSuccessor(NBB);
- std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
- std::back_inserter(ToBBI.Predicate));
+ ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
FromBBI.Predicate.clear();
ToBBI.NonPredSize += FromBBI.NonPredSize;
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 187e015..fd5749b 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -21,7 +21,8 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
// Static member used for null interference cursors.
-InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
+const InterferenceCache::BlockInterference
+ InterferenceCache::Cursor::NoInterference;
// Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a
// buffer of size NumPhysRegs to speed up alloc/clear for targets with large
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 1791afb..6519a80 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -170,8 +170,8 @@ public:
/// Cursor - The primary query interface for the block interference cache.
class Cursor {
Entry *CacheEntry;
- BlockInterference *Current;
- static BlockInterference NoInterference;
+ const BlockInterference *Current;
+ static const BlockInterference NoInterference;
void setEntry(Entry *E) {
Current = nullptr;
diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp
deleted file mode 100644
index 75fa261..0000000
--- a/lib/CodeGen/JumpInstrTables.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief An implementation of jump-instruction tables.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jt"
-
-#include "llvm/CodeGen/JumpInstrTables.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <vector>
-
-using namespace llvm;
-
-char JumpInstrTables::ID = 0;
-
-INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables",
- "Jump-Instruction Tables", true, true)
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
-INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables",
- "Jump-Instruction Tables", true, true)
-
-STATISTIC(NumJumpTables, "Number of indirect call tables generated");
-STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables");
-
-ModulePass *llvm::createJumpInstrTablesPass() {
- // The default implementation uses a single table for all functions.
- return new JumpInstrTables(JumpTable::Single);
-}
-
-ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) {
- return new JumpInstrTables(JTT);
-}
-
-namespace {
-static const char jump_func_prefix[] = "__llvm_jump_instr_table_";
-static const char jump_section_prefix[] = ".jump.instr.table.text.";
-
-// Checks to see if a given CallSite is making an indirect call, including
-// cases where the indirect call is made through a bitcast.
-bool isIndirectCall(CallSite &CS) {
- if (CS.getCalledFunction())
- return false;
-
- // Check the value to see if it is merely a bitcast of a function. In
- // this case, it will translate to a direct function call in the resulting
- // assembly, so we won't treat it as an indirect call here.
- const Value *V = CS.getCalledValue();
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
- }
-
- // Otherwise, since we know it's a call, it must be an indirect call
- return true;
-}
-
-// Replaces Functions and GlobalAliases with a different Value.
-bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) {
- User *Us = U->getUser();
- if (!Us)
- return false;
- if (Instruction *I = dyn_cast<Instruction>(Us)) {
- CallSite CS(I);
-
- // Don't do the replacement if this use is a direct call to this function.
- // If the use is not the called value, then replace it.
- if (CS && (isIndirectCall(CS) || CS.isCallee(U))) {
- return false;
- }
-
- U->set(V);
- } else if (Constant *C = dyn_cast<Constant>(Us)) {
- // Don't replace calls to bitcasts of function symbols, since they get
- // translated to direct calls.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) {
- if (CE->getOpcode() == Instruction::BitCast) {
- // This bitcast must have exactly one user.
- if (CE->user_begin() != CE->user_end()) {
- User *ParentUs = *CE->user_begin();
- if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) {
- CallSite CS(CI);
- Use &CEU = *CE->use_begin();
- if (CS.isCallee(&CEU)) {
- return false;
- }
- }
- }
- }
- }
-
- // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier
- // requires alias to point to a defined function. So, GlobalAlias is handled
- // as a separate case in runOnModule.
- if (!isa<GlobalAlias>(C))
- C->replaceUsesOfWithOnConstant(GV, V, U);
- } else {
- llvm_unreachable("The Use of a Function symbol is neither an instruction "
- "nor a constant");
- }
-
- return true;
-}
-
-// Replaces all replaceable address-taken uses of GV with a pointer to a
-// jump-instruction table entry.
-void replaceValueWithFunction(GlobalValue *GV, Function *F) {
- // Go through all uses of this function and replace the uses of GV with the
- // jump-table version of the function. Get the uses as a vector before
- // replacing them, since replacing them changes the use list and invalidates
- // the iterator otherwise.
- for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) {
- Use &U = *I++;
-
- // Replacement of constants replaces all instances in the constant. So, some
- // uses might have already been handled by the time we reach them here.
- if (U.get() == GV)
- replaceGlobalValueIndirectUse(GV, F, &U);
- }
-
- return;
-}
-} // end anonymous namespace
-
-JumpInstrTables::JumpInstrTables()
- : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0),
- JTType(JumpTable::Single) {
- initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT)
- : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) {
- initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTables::~JumpInstrTables() {}
-
-void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<JumpInstrTableInfo>();
-}
-
-Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
- FunctionType *OrigFunTy = Target->getFunctionType();
- FunctionType *FunTy = transformType(JTType, OrigFunTy);
-
- JumpMap::iterator it = Metadata.find(FunTy);
- if (Metadata.end() == it) {
- struct TableMeta Meta;
- Meta.TableNum = TableCount;
- Meta.Count = 0;
- Metadata[FunTy] = Meta;
- it = Metadata.find(FunTy);
- ++NumJumpTables;
- ++TableCount;
- }
-
- it->second.Count++;
-
- std::string NewName(jump_func_prefix);
- NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str();
- Function *JumpFun =
- Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M);
- // The section for this table
- JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str());
- JITI->insertEntry(FunTy, Target, JumpFun);
-
- ++NumFuncsInJumpTables;
- return JumpFun;
-}
-
-bool JumpInstrTables::hasTable(FunctionType *FunTy) {
- FunctionType *TransTy = transformType(JTType, FunTy);
- return Metadata.end() != Metadata.find(TransTy);
-}
-
-FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT,
- FunctionType *FunTy) {
- // Returning nullptr forces all types into the same table, since all types map
- // to the same type
- Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
-
- // Ignore the return type.
- Type *RetTy = VoidPtrTy;
- bool IsVarArg = FunTy->isVarArg();
- std::vector<Type *> ParamTys(FunTy->getNumParams());
- FunctionType::param_iterator PI, PE;
- int i = 0;
-
- std::vector<Type *> EmptyParams;
- Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
- FunctionType *VoidFnTy = FunctionType::get(
- Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
- switch (JTT) {
- case JumpTable::Single:
-
- return FunctionType::get(RetTy, EmptyParams, false);
- case JumpTable::Arity:
- // Transform all types to void* so that all functions with the same arity
- // end up in the same table.
- for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
- PI++, i++) {
- ParamTys[i] = VoidPtrTy;
- }
-
- return FunctionType::get(RetTy, ParamTys, IsVarArg);
- case JumpTable::Simplified:
- // Project all parameters types to one of 3 types: composite, integer, and
- // function, matching the three subclasses of Type.
- for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
- ++PI, ++i) {
- assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) ||
- isa<CompositeType>(*PI)) &&
- "This type is not an Integer or a Composite or a Function");
- if (isa<CompositeType>(*PI)) {
- ParamTys[i] = VoidPtrTy;
- } else if (isa<FunctionType>(*PI)) {
- ParamTys[i] = VoidFnTy;
- } else if (isa<IntegerType>(*PI)) {
- ParamTys[i] = Int32Ty;
- }
- }
-
- return FunctionType::get(RetTy, ParamTys, IsVarArg);
- case JumpTable::Full:
- // Don't transform this type at all.
- return FunTy;
- }
-
- return nullptr;
-}
-
-bool JumpInstrTables::runOnModule(Module &M) {
- JITI = &getAnalysis<JumpInstrTableInfo>();
-
- // Get the set of jumptable-annotated functions that have their address taken.
- DenseMap<Function *, Function *> Functions;
- for (Function &F : M) {
- if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) {
- assert(F.hasUnnamedAddr() &&
- "Attribute 'jumptable' requires 'unnamed_addr'");
- Functions[&F] = nullptr;
- }
- }
-
- // Create the jump-table functions.
- for (auto &KV : Functions) {
- Function *F = KV.first;
- KV.second = insertEntry(M, F);
- }
-
- // GlobalAlias is a special case, because the target of an alias statement
- // must be a defined function. So, instead of replacing a given function in
- // the alias, we replace all uses of aliases that target jumptable functions.
- // Note that there's no need to create these functions, since only aliases
- // that target known jumptable functions are replaced, and there's no way to
- // put the jumptable annotation on a global alias.
- DenseMap<GlobalAlias *, Function *> Aliases;
- for (GlobalAlias &GA : M.aliases()) {
- Constant *Aliasee = GA.getAliasee();
- if (Function *F = dyn_cast<Function>(Aliasee)) {
- auto it = Functions.find(F);
- if (it != Functions.end()) {
- Aliases[&GA] = it->second;
- }
- }
- }
-
- // Replace each address taken function with its jump-instruction table entry.
- for (auto &KV : Functions)
- replaceValueWithFunction(KV.first, KV.second);
-
- for (auto &KV : Aliases)
- replaceValueWithFunction(KV.first, KV.second);
-
- return !Functions.empty();
-}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 9c23368..0fb0c46 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,12 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
-#include "llvm/CodeGen/JumpInstrTables.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -33,12 +30,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -50,8 +43,16 @@ EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
void LLVMTargetMachine::initAsmInfo() {
- MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
- *getSubtargetImpl()->getRegisterInfo(), getTargetTriple());
+ MRI = TheTarget.createMCRegInfo(getTargetTriple());
+ MII = TheTarget.createMCInstrInfo();
+ // FIXME: Having an MCSubtargetInfo on the target machine is a hack due
+ // to some backends having subtarget feature dependent module level
+ // code generation. This is similar to the hack in the AsmPrinter for
+ // module level assembly etc.
+ STI = TheTarget.createMCSubtargetInfo(getTargetTriple(), getTargetCPU(),
+ getTargetFeatureString());
+
+ MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple());
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
// and if the old one gets included then MCAsmInfo will be NULL and
// we'll crash later.
@@ -69,12 +70,13 @@ void LLVMTargetMachine::initAsmInfo() {
AsmInfo = TmpAsmInfo;
}
-LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+ StringRef DataLayoutString,
+ StringRef Triple, StringRef CPU,
+ StringRef FS, TargetOptions Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, Triple, CPU, FS, Options) {
+ : TargetMachine(T, DataLayoutString, Triple, CPU, FS, Options) {
CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
}
@@ -115,8 +117,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
MachineModuleInfo *MMI = new MachineModuleInfo(
- *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(),
- TM->getObjFileLowering());
+ *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering());
PM.add(MMI);
// Set up a MachineFunction for the rest of CodeGen to work on.
@@ -145,16 +146,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) {
- // Passes to handle jumptable function annotations. These can't be handled at
- // JIT time, so we don't add them directly to addPassesToGenerateCode.
- PM.add(createJumpInstrTableInfoPass(
- getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound()));
- PM.add(createJumpInstrTablesPass(Options.JTType));
- if (Options.FCFI)
- PM.add(createForwardControlFlowIntegrityPass(
- Options.JTType, Options.CFIType, Options.CFIEnforcing,
- Options.getCFIFuncName()));
-
// Add common CodeGen passes.
MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
StartAfter, StopAfter);
@@ -174,22 +165,22 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
if (Options.MCOptions.MCSaveTempLabels)
Context->setAllowTemporaryLabels(false);
- const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCAsmInfo &MAI = *getMCAsmInfo();
- const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
- const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo();
+ const MCRegisterInfo &MRI = *getMCRegisterInfo();
+ const MCInstrInfo &MII = *getMCInstrInfo();
+
std::unique_ptr<MCStreamer> AsmStreamer;
switch (FileType) {
case CGFT_AssemblyFile: {
- MCInstPrinter *InstPrinter =
- getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
- MII, MRI, STI);
+ MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
+ MAI.getAssemblerDialect(), MAI, MII, MRI, STI);
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = nullptr;
if (Options.MCOptions.ShowMCEncoding)
- MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context);
+ MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
@@ -203,17 +194,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI,
- *Context);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
if (!MCE || !MAB)
return true;
- AsmStreamer.reset(
- getTarget()
- .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE,
- STI, Options.MCOptions.MCRelaxAll));
+ Triple T(getTargetTriple());
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(
+ T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ /*DWARFMustBeAtTheEnd*/ true));
break;
}
case CGFT_Null:
@@ -253,18 +243,19 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
- const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(
- *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx);
+ const MCRegisterInfo &MRI = *getMCRegisterInfo();
+ MCCodeEmitter *MCE =
+ getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
if (!MCE || !MAB)
return true;
+ Triple T(getTargetTriple());
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
- getTargetTriple(), *Ctx, *MAB, Out, MCE, STI,
- Options.MCOptions.MCRelaxAll));
+ T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ /*DWARFMustBeAtTheEnd*/ true));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer =
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index cdf505e..4321849 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -138,16 +138,3 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
std::swap(*I, Queue.back());
Queue.pop_back();
}
-
-#ifdef NDEBUG
-void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
-#else
-void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
- LatencyPriorityQueue q = *this;
- while (!q.empty()) {
- SUnit *su = q.pop();
- dbgs() << "Height " << su->getHeight() << ": ";
- su->dump(DAG);
- }
-}
-#endif
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index dc936a3..e3791be 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -276,7 +277,7 @@ public:
/// getDebugLoc - Return DebugLoc of this UserValue.
DebugLoc getDebugLoc() { return dl;}
- void print(raw_ostream&, const TargetMachine*);
+ void print(raw_ostream &, const TargetRegisterInfo *);
};
} // namespace
@@ -362,7 +363,7 @@ public:
};
} // namespace
-void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
DIVariable DV(Variable);
OS << "!\"";
DV.printExtendedName(OS);
@@ -378,7 +379,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
}
for (unsigned i = 0, e = locations.size(); i != e; ++i) {
OS << " Loc" << i << '=';
- locations[i].print(OS, TM);
+ locations[i].print(OS, TRI);
}
OS << '\n';
}
@@ -386,7 +387,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
void LDVImpl::print(raw_ostream &OS) {
OS << "********** DEBUG VARIABLES **********\n";
for (unsigned i = 0, e = userValues.size(); i != e; ++i)
- userValues[i]->print(OS, &MF->getTarget());
+ userValues[i]->print(OS, TRI);
}
void UserValue::coalesceLocation(unsigned LocNo) {
@@ -1004,7 +1005,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
return;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+ DEBUG(userValues[i]->print(dbgs(), TRI));
userValues[i]->rewriteLocations(*VRM, *TRI);
userValues[i]->emitDebugValues(VRM, *LIS, *TII);
}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index fd7516d..2afd7fa 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -32,6 +32,7 @@
#include <algorithm>
using namespace llvm;
+namespace {
//===----------------------------------------------------------------------===//
// Implementation of various methods necessary for calculation of live ranges.
// The implementation of the methods abstracts from the concrete type of the
@@ -293,6 +294,7 @@ private:
return I;
}
};
+} // namespace
//===----------------------------------------------------------------------===//
// LiveRange methods
@@ -567,13 +569,9 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
/// Also remove the value# from value# list.
void LiveRange::removeValNo(VNInfo *ValNo) {
if (empty()) return;
- iterator I = end();
- iterator E = begin();
- do {
- --I;
- if (I->valno == ValNo)
- segments.erase(I);
- } while (I != E);
+ segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) {
+ return S.valno == ValNo;
+ }), end());
// Now that ValNo is dead, remove it.
markValNoForDeletion(ValNo);
}
@@ -747,7 +745,6 @@ void LiveRange::flushSegmentSet() {
segments.empty() &&
"segment set can be used only initially before switching to the array");
segments.append(segmentSet->begin(), segmentSet->end());
- delete segmentSet;
segmentSet = nullptr;
verify();
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index cc08045..adca4cc 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -199,7 +199,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LRCalc->calculate(LI);
+ LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
computeDeadValues(LI, nullptr);
}
@@ -466,7 +466,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// Is the register live before? Otherwise we may have to add a read-undef
// flag for subregister defs.
- if (MRI->tracksSubRegLiveness()) {
+ if (MRI->shouldTrackSubRegLiveness(LI.reg)) {
if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
MachineInstr *MI = getInstructionFromIndex(Def);
MI->addRegisterDefReadUndef(LI.reg);
@@ -662,7 +662,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
}
- if (MRI->tracksSubRegLiveness()) {
+ if (MRI->subRegLivenessEnabled()) {
SRs.clear();
for (const LiveInterval::SubRange &SR : LI.subranges()) {
SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
@@ -700,7 +700,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
goto CancelKill;
}
- if (MRI->tracksSubRegLiveness()) {
+ if (MRI->subRegLivenessEnabled()) {
// When reading a partial undefined value we must not add a kill flag.
// The regalloc might have used the undef lane for something else.
// Example:
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 7efd941..89567ef 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index d804b39..45e7265 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -50,7 +50,7 @@ static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
LR.createDeadDef(DefIdx, Alloc);
}
-void LiveRangeCalc::calculate(LiveInterval &LI) {
+void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
assert(MRI && Indexes && "call reset() first");
// Step 1: Create minimal live segments for every definition of Reg.
@@ -63,7 +63,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI) {
continue;
unsigned SubReg = MO.getSubReg();
- if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) {
+ if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
: MRI->getMaxLaneMaskForVReg(Reg);
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 90bf971..34d9953 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -187,7 +187,7 @@ public:
/// Calculates liveness for the register specified in live interval @p LI.
/// Creates subregister live ranges as needed if subreg liveness tracking is
/// enabled.
- void calculate(LiveInterval &LI);
+ void calculate(LiveInterval &LI, bool TrackSubRegs);
//===--------------------------------------------------------------------===//
// Low-level interface.
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 8a6ac25..5c9c679 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -61,8 +61,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
assert(Slot >= 0 && "Spill slot indice must be >= 0");
SS2IntervalMap::iterator I = S2IMap.find(Slot);
if (I == S2IMap.end()) {
- I = S2IMap.insert(I, std::make_pair(Slot,
- LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+ I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot),
+ std::forward_as_tuple(
+ TargetRegisterInfo::index2StackSlot(Slot), 0.0F))
+ .first;
S2RCMap.insert(std::make_pair(Slot, RC));
} else {
// Use the largest common subclass register class.
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index c4bca5f..11deb81 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index e8bf687..8378429 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -252,7 +252,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
static inline bool
-lookupCandidateBaseReg(int64_t BaseOffset,
+lookupCandidateBaseReg(unsigned BaseReg,
+ int64_t BaseOffset,
int64_t FrameSizeAdjust,
int64_t LocalFrameOffset,
const MachineInstr *MI,
@@ -260,7 +261,7 @@ lookupCandidateBaseReg(int64_t BaseOffset,
// Check if the relative offset from the where the base register references
// to the target address is in range for the instruction.
int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
- return TRI->isFrameOffsetLegal(MI, Offset);
+ return TRI->isFrameOffsetLegal(MI, BaseReg, Offset);
}
bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -362,8 +363,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// instruction itself will be taken into account by the target,
// so we don't have to adjust for it here when reusing a base
// register.
- if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
- LocalOffset, MI, TRI)) {
+ if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset,
+ FrameSizeAdjust, LocalOffset, MI,
+ TRI)) {
DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
// We found a register to reuse.
Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
@@ -382,7 +384,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// then don't bother creating it.
if (ref + 1 >= e ||
!lookupCandidateBaseReg(
- BaseOffset, FrameSizeAdjust,
+ BaseReg, BaseOffset, FrameSizeAdjust,
FrameReferenceInsns[ref + 1].getLocalOffset(),
FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
BaseOffset = PrevBaseOffset;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 3c73905..98359b1 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -307,7 +307,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << '\t';
if (I->isInsideBundle())
OS << " * ";
- I->print(OS, &getParent()->getTarget());
+ I->print(OS);
}
// Print the successors of this block according to the CFG.
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 1b5c1f1..ecc50c9 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -40,13 +41,14 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
-#define DEBUG_TYPE "block-placement2"
+#define DEBUG_TYPE "block-placement"
STATISTIC(NumCondBranches, "Number of conditional branches");
STATISTIC(NumUncondBranches, "Number of uncondittional branches");
@@ -61,11 +63,23 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
cl::init(0), cl::Hidden);
// FIXME: Find a good default for this flag and remove the flag.
-static cl::opt<unsigned>
-ExitBlockBias("block-placement-exit-block-bias",
- cl::desc("Block frequency percentage a loop exit block needs "
- "over the original exit to be considered the new exit."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> ExitBlockBias(
+ "block-placement-exit-block-bias",
+ cl::desc("Block frequency percentage a loop exit block needs "
+ "over the original exit to be considered the new exit."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<bool> OutlineOptionalBranches(
+ "outline-optional-branches",
+ cl::desc("Put completely optional branches, i.e. branches with a common "
+ "post dominator, out of line."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> OutlineOptionalThreshold(
+ "outline-optional-threshold",
+ cl::desc("Don't outline optional branches that are a single block with an "
+ "instruction count below this threshold"),
+ cl::init(4), cl::Hidden);
namespace {
class BlockChain;
@@ -107,7 +121,7 @@ public:
/// function. It also registers itself as the chain that block participates
/// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
- : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
assert(BB && "Cannot create a chain with a null basic block");
BlockToChain[BB] = this;
}
@@ -144,19 +158,18 @@ public:
// Update the incoming blocks to point to this chain, and add them to the
// chain structure.
- for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
- BI != BE; ++BI) {
- Blocks.push_back(*BI);
- assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
- BlockToChain[*BI] = this;
+ for (MachineBasicBlock *ChainBB : *Chain) {
+ Blocks.push_back(ChainBB);
+ assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
+ BlockToChain[ChainBB] = this;
}
}
#ifndef NDEBUG
/// \brief Dump the blocks in this chain.
LLVM_DUMP_METHOD void dump() {
- for (iterator I = begin(), E = end(); I != E; ++I)
- (*I)->dump();
+ for (MachineBasicBlock *MBB : *this)
+ MBB->dump();
}
#endif // NDEBUG
@@ -188,6 +201,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A handle to the target's lowering info.
const TargetLoweringBase *TLI;
+ /// \brief A handle to the post dominator tree.
+ MachineDominatorTree *MDT;
+
+ /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
+ /// all terminators of the MachineFunction.
+ SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
+
/// \brief Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -205,28 +225,26 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// between basic blocks.
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
- void markChainSuccessors(BlockChain &Chain,
- MachineBasicBlock *LoopHeaderBB,
+ void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
BlockChain &Chain,
const BlockFilterSet *BlockFilter);
- MachineBasicBlock *selectBestCandidateBlock(
- BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *getFirstUnplacedBlock(
- MachineFunction &F,
- const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *
+ selectBestCandidateBlock(BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *
+ getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- MachineBasicBlock *findBestLoopExit(MachineFunction &F,
- MachineLoop &L,
+ MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
@@ -244,6 +262,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -252,12 +271,13 @@ public:
char MachineBlockPlacement::ID = 0;
char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
#ifndef NDEBUG
@@ -267,8 +287,8 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
static std::string getBlockName(MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
- OS << "BB#" << BB->getNumber()
- << " (derived from LLVM BB '" << BB->getName() << "')";
+ OS << "BB#" << BB->getNumber();
+ OS << " (derived from LLVM BB '" << BB->getName() << "')";
OS.flush();
return Result;
}
@@ -292,26 +312,22 @@ static std::string getBlockNum(MachineBasicBlock *BB) {
/// having one fewer active predecessor. It also adds any successors of this
/// chain which reach the zero-predecessor state to the worklist passed in.
void MachineBlockPlacement::markChainSuccessors(
- BlockChain &Chain,
- MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
- for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
- CBI != CBE; ++CBI) {
+ for (MachineBasicBlock *MBB : Chain) {
// Add any successors for which this is the only un-placed in-loop
// predecessor to the worklist as a viable candidate for CFG-neutral
// placement. No subsequent placement of this block will violate the CFG
// shape, so we get to use heuristics to choose a favorable placement.
- for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
- SE = (*CBI)->succ_end();
- SI != SE; ++SI) {
- if (BlockFilter && !BlockFilter->count(*SI))
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
continue;
- BlockChain &SuccChain = *BlockToChain[*SI];
+ BlockChain &SuccChain = *BlockToChain[Succ];
// Disregard edges within a fixed chain, or edges to the loop header.
- if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+ if (&Chain == &SuccChain || Succ == LoopHeaderBB)
continue;
// This is a cross-chain edge that is within the loop, so decrement the
@@ -331,9 +347,10 @@ void MachineBlockPlacement::markChainSuccessors(
/// very hot successor edges.
///
/// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
- MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
+MachineBasicBlock *
+MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
const BranchProbability HotProb(4, 5); // 80%
MachineBasicBlock *BestSucc = nullptr;
@@ -363,6 +380,30 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ // If we outline optional branches, look whether Succ is unavoidable, i.e.
+ // dominates all terminators of the MachineFunction. If it does, other
+ // successors must be optional. Don't do this for cold branches.
+ if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
+ UnavoidableBlocks.count(Succ) > 0) {
+ auto HasShortOptionalBranch = [&]() {
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Check whether there is an unplaced optional branch.
+ if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Check whether the optional branch has exactly one BB.
+ if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ continue;
+ // Check whether the optional branch is small.
+ if (Pred->size() < OutlineOptionalThreshold)
+ return true;
+ }
+ return false;
+ };
+ if (!HasShortOptionalBranch())
+ return Succ;
+ }
+
// Only consider successors which are either "hot", or wouldn't violate
// any CFG constraints.
if (SuccChain.LoopPredecessors != 0) {
@@ -426,29 +467,26 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// some code complexity) into the loop below.
WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
[&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
+ return BlockToChain.lookup(BB) == &Chain;
+ }),
WorkList.end());
MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
- for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
- WBE = WorkList.end();
- WBI != WBE; ++WBI) {
- BlockChain &SuccChain = *BlockToChain[*WBI];
+ for (MachineBasicBlock *MBB : WorkList) {
+ BlockChain &SuccChain = *BlockToChain[MBB];
if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(*WBI)
- << " -> Already merged!\n");
+ DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n");
continue;
}
assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
- BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
- DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> ";
- MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
+ DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
+ MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
if (BestBlock && BestFreq >= CandidateFreq)
continue;
- BestBlock = *WBI;
+ BestBlock = MBB;
BestFreq = CandidateFreq;
}
return BestBlock;
@@ -481,8 +519,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
}
void MachineBlockPlacement::buildChain(
- MachineBasicBlock *BB,
- BlockChain &Chain,
+ MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
assert(BB);
@@ -509,8 +546,8 @@ void MachineBlockPlacement::buildChain(
BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
if (!BestSucc) {
- BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
- BlockFilter);
+ BestSucc =
+ getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
if (!BestSucc)
break;
@@ -523,8 +560,8 @@ void MachineBlockPlacement::buildChain(
// Zero out LoopPredecessors for the successor we're about to merge in case
// we selected a successor that didn't fit naturally into the CFG.
SuccChain.LoopPredecessors = 0;
- DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
- << " to " << getBlockNum(BestSucc) << "\n");
+ DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
+ << getBlockNum(BestSucc) << "\n");
markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *std::prev(Chain.end());
@@ -554,20 +591,17 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
if (!LoopBlockSet.count(*HeaderChain.begin()))
return L.getHeader();
- DEBUG(dbgs() << "Finding best loop top for: "
- << getBlockName(L.getHeader()) << "\n");
+ DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
+ << "\n");
BlockFrequency BestPredFreq;
MachineBasicBlock *BestPred = nullptr;
- for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
- PE = L.getHeader()->pred_end();
- PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
+ for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
if (!LoopBlockSet.count(Pred))
continue;
DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
<< Pred->succ_size() << " successors, ";
- MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+ MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
if (Pred->succ_size() > 1)
continue;
@@ -594,15 +628,13 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
return BestPred;
}
-
/// \brief Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
- MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
@@ -624,15 +656,13 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
// blocks where rotating to exit with that block will reach an outer loop.
SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
- DEBUG(dbgs() << "Finding best loop exit for: "
- << getBlockName(L.getHeader()) << "\n");
- for (MachineLoop::block_iterator I = L.block_begin(),
- E = L.block_end();
- I != E; ++I) {
- BlockChain &Chain = *BlockToChain[*I];
+ DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
+ << "\n");
+ for (MachineBasicBlock *MBB : L.getBlocks()) {
+ BlockChain &Chain = *BlockToChain[MBB];
// Ensure that this block is at the end of a chain; otherwise it could be
// mid-way through an inner loop or a successor of an analyzable branch.
- if (*I != *std::prev(Chain.end()))
+ if (MBB != *std::prev(Chain.end()))
continue;
// Now walk the successors. We need to establish whether this has a viable
@@ -646,43 +676,40 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
// the MBPI analysis, we use the internal weights and manually compute the
// probabilities to avoid quadratic behavior.
uint32_t WeightScale = 0;
- uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
- for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
- SE = (*I)->succ_end();
- SI != SE; ++SI) {
- if ((*SI)->isLandingPad())
+ uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ->isLandingPad())
continue;
- if (*SI == *I)
+ if (Succ == MBB)
continue;
- BlockChain &SuccChain = *BlockToChain[*SI];
+ BlockChain &SuccChain = *BlockToChain[Succ];
// Don't split chains, either this chain or the successor's chain.
if (&Chain == &SuccChain) {
- DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " (chain conflict)\n");
+ DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (chain conflict)\n");
continue;
}
- uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
- if (LoopBlockSet.count(*SI)) {
- DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+ uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
+ if (LoopBlockSet.count(Succ)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (" << SuccWeight << ")\n");
HasLoopingSucc = true;
continue;
}
unsigned SuccLoopDepth = 0;
- if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
SuccLoopDepth = ExitLoop->getLoopDepth();
if (ExitLoop->contains(&L))
- BlocksExitingToOuterLoop.insert(*I);
+ BlocksExitingToOuterLoop.insert(MBB);
}
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
- BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
- DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " [L:" << SuccLoopDepth
- << "] (";
- MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
+ MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
// Note that we bias this toward an existing layout successor to retain
// incoming order in the absence of better information. The exit must have
// a frequency higher than the current exit before we consider breaking
@@ -690,10 +717,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
BranchProbability Bias(100 - ExitBlockBias, 100);
if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
ExitEdgeFreq > BestExitEdgeFreq ||
- ((*I)->isLayoutSuccessor(*SI) &&
+ (MBB->isLayoutSuccessor(Succ) &&
!(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
BestExitEdgeFreq = ExitEdgeFreq;
- ExitingBB = *I;
+ ExitingBB = MBB;
}
}
@@ -734,12 +761,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
MachineBasicBlock *Top = *LoopChain.begin();
bool ViableTopFallthrough = false;
- for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
- PE = Top->pred_end();
- PI != PE; ++PI) {
- BlockChain *PredChain = BlockToChain[*PI];
- if (!LoopBlockSet.count(*PI) &&
- (!PredChain || *PI == *std::prev(PredChain->end()))) {
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
ViableTopFallthrough = true;
break;
}
@@ -750,18 +775,16 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
// introduce an unnecessary branch.
if (ViableTopFallthrough) {
MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
- for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
- SE = Bottom->succ_end();
- SI != SE; ++SI) {
- BlockChain *SuccChain = BlockToChain[*SI];
- if (!LoopBlockSet.count(*SI) &&
- (!SuccChain || *SI == *SuccChain->begin()))
+ for (MachineBasicBlock *Succ : Bottom->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin()))
return;
}
}
- BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
- ExitingBB);
+ BlockChain::iterator ExitIt =
+ std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
if (ExitIt == LoopChain.end())
return;
@@ -778,8 +801,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner
// loops.
- for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
- buildLoopChains(F, **LI);
+ for (MachineLoop *InnerLoop : L)
+ buildLoopChains(F, *InnerLoop);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
@@ -805,21 +828,16 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
assert(LoopChain.LoopPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineLoop::block_iterator BI = L.block_begin(),
- BE = L.block_end();
- BI != BE; ++BI) {
- BlockChain &Chain = *BlockToChain[*BI];
+ for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ BlockChain &Chain = *BlockToChain[LoopBB];
if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
- for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
- BCI != BCE; ++BCI) {
- assert(BlockToChain[*BCI] == &Chain);
- for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
- PE = (*BCI)->pred_end();
- PI != PE; ++PI) {
- if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
continue;
++Chain.LoopPredecessors;
}
@@ -841,29 +859,26 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
}
- for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
- BCI != BCE; ++BCI) {
- dbgs() << " ... " << getBlockName(*BCI) << "\n";
- if (!LoopBlockSet.erase(*BCI)) {
+ for (MachineBasicBlock *ChainBB : LoopChain) {
+ dbgs() << " ... " << getBlockName(ChainBB) << "\n";
+ if (!LoopBlockSet.erase(ChainBB)) {
// We don't mark the loop as bad here because there are real situations
// where this can occur. For example, with an unanalyzable fallthrough
// from a loop block to a non-loop block or vice versa.
dbgs() << "Loop chain contains a block not contained by the loop!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
- << " Bad block: " << getBlockName(*BCI) << "\n";
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
}
}
if (!LoopBlockSet.empty()) {
BadLoop = true;
- for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
- LBE = LoopBlockSet.end();
- LBI != LBE; ++LBI)
+ for (MachineBasicBlock *LoopBB : LoopBlockSet)
dbgs() << "Loop contains blocks never placed into a chain!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
- << " Bad block: " << getBlockName(*LBI) << "\n";
+ << " Bad block: " << getBlockName(LoopBB) << "\n";
}
assert(!BadLoop && "Detected problems with the placement of this loop.");
});
@@ -875,8 +890,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
MachineBasicBlock *BB = FI;
- BlockChain *Chain
- = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ BlockChain *Chain =
+ new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
// Also, merge any blocks which we cannot reason about and must preserve
// the exact fallthrough behavior for.
for (;;) {
@@ -899,28 +914,44 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
}
+ if (OutlineOptionalBranches) {
+ // Find the nearest common dominator of all of F's terminators.
+ MachineBasicBlock *Terminator = nullptr;
+ for (MachineBasicBlock &MBB : F) {
+ if (MBB.succ_size() == 0) {
+ if (Terminator == nullptr)
+ Terminator = &MBB;
+ else
+ Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+ }
+ }
+
+ // MBBs dominating this common dominator are unavoidable.
+ UnavoidableBlocks.clear();
+ for (MachineBasicBlock &MBB : F) {
+ if (MDT->dominates(&MBB, Terminator)) {
+ UnavoidableBlocks.insert(&MBB);
+ }
+ }
+ }
+
// Build any loop-based chains.
- for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
- ++LI)
- buildLoopChains(F, **LI);
+ for (MachineLoop *L : *MLI)
+ buildLoopChains(F, *L);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- MachineBasicBlock *BB = &*FI;
- BlockChain &Chain = *BlockToChain[BB];
+ for (MachineBasicBlock &MBB : F) {
+ BlockChain &Chain = *BlockToChain[&MBB];
if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
- for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
- BCI != BCE; ++BCI) {
- assert(BlockToChain[*BCI] == &Chain);
- for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
- PE = (*BCI)->pred_end();
- PI != PE; ++PI) {
- if (BlockToChain[*PI] == &Chain)
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockToChain[Pred] == &Chain)
continue;
++Chain.LoopPredecessors;
}
@@ -940,46 +971,40 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Crash at the end so we get all of the debugging output first.
bool BadFunc = false;
FunctionBlockSetType FunctionBlockSet;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- FunctionBlockSet.insert(FI);
+ for (MachineBasicBlock &MBB : F)
+ FunctionBlockSet.insert(&MBB);
- for (BlockChain::iterator BCI = FunctionChain.begin(),
- BCE = FunctionChain.end();
- BCI != BCE; ++BCI)
- if (!FunctionBlockSet.erase(*BCI)) {
+ for (MachineBasicBlock *ChainBB : FunctionChain)
+ if (!FunctionBlockSet.erase(ChainBB)) {
BadFunc = true;
dbgs() << "Function chain contains a block not in the function!\n"
- << " Bad block: " << getBlockName(*BCI) << "\n";
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
}
if (!FunctionBlockSet.empty()) {
BadFunc = true;
- for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
- FBE = FunctionBlockSet.end();
- FBI != FBE; ++FBI)
+ for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
dbgs() << "Function contains blocks never placed into a chain!\n"
- << " Bad block: " << getBlockName(*FBI) << "\n";
+ << " Bad block: " << getBlockName(RemainingBB) << "\n";
}
assert(!BadFunc && "Detected problems with the block placement.");
});
// Splice the blocks into place.
MachineFunction::iterator InsertPos = F.begin();
- for (BlockChain::iterator BI = FunctionChain.begin(),
- BE = FunctionChain.end();
- BI != BE; ++BI) {
- DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
- : " ... ")
- << getBlockName(*BI) << "\n");
- if (InsertPos != MachineFunction::iterator(*BI))
- F.splice(InsertPos, *BI);
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(ChainBB) << "\n");
+ if (InsertPos != MachineFunction::iterator(ChainBB))
+ F.splice(InsertPos, ChainBB);
else
++InsertPos;
// Update the terminator of the previous block.
- if (BI == FunctionChain.begin())
+ if (ChainBB == *FunctionChain.begin())
continue;
- MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI));
+ MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
// FIXME: It would be awesome of updateTerminator would just return rather
// than assert when the branch cannot be analyzed in order to remove this
@@ -989,16 +1014,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
// The "PrevBB" is not yet updated to reflect current code layout, so,
// o. it may fall-through to a block without explict "goto" instruction
- // before layout, and no longer fall-through it after layout; or
+ // before layout, and no longer fall-through it after layout; or
// o. just opposite.
- //
+ //
// AnalyzeBranch() may return erroneous value for FBB when these two
// situations take place. For the first scenario FBB is mistakenly set
// NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
// is mistakenly pointing to "*BI".
//
bool needUpdateBr = true;
- if (!Cond.empty() && (!FBB || FBB == *BI)) {
+ if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
PrevBB->updateTerminator();
needUpdateBr = false;
Cond.clear();
@@ -1018,7 +1043,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
<< getBlockName(PrevBB) << "\n");
DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
<< " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PrevBB);
TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
needUpdateBr = true;
@@ -1042,29 +1067,30 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
return;
if (FunctionChain.begin() == FunctionChain.end())
- return; // Empty chain.
+ return; // Empty chain.
const BranchProbability ColdProb(1, 5); // 20%
BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
- for (BlockChain::iterator BI = std::next(FunctionChain.begin()),
- BE = FunctionChain.end();
- BI != BE; ++BI) {
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ if (ChainBB == *FunctionChain.begin())
+ continue;
+
// Don't align non-looping basic blocks. These are unlikely to execute
// enough times to matter in practice. Note that we'll still handle
// unnatural CFGs inside of a natural outer loop (the common case) and
// rotated loops.
- MachineLoop *L = MLI->getLoopFor(*BI);
+ MachineLoop *L = MLI->getLoopFor(ChainBB);
if (!L)
continue;
unsigned Align = TLI->getPrefLoopAlignment(L);
if (!Align)
- continue; // Don't care about loop alignment.
+ continue; // Don't care about loop alignment.
// If the block is cold relative to the function entry don't waste space
// aligning it.
- BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
if (Freq < WeightedEntryFreq)
continue;
@@ -1077,12 +1103,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Check for the existence of a non-layout predecessor which would benefit
// from aligning this block.
- MachineBasicBlock *LayoutPred = *std::prev(BI);
+ MachineBasicBlock *LayoutPred =
+ &*std::prev(MachineFunction::iterator(ChainBB));
// Force alignment if all the predecessors are jumps. We already checked
// that the block isn't cold above.
- if (!LayoutPred->isSuccessor(*BI)) {
- (*BI)->setAlignment(Align);
+ if (!LayoutPred->isSuccessor(ChainBB)) {
+ ChainBB->setAlignment(Align);
continue;
}
@@ -1090,10 +1117,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// cold relative to the block. When this is true, other predecessors make up
// all of the hot entries into the block and thus alignment is likely to be
// important.
- BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BranchProbability LayoutProb =
+ MBPI->getEdgeProbability(LayoutPred, ChainBB);
BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
if (LayoutEdgeFreq <= (Freq * ColdProb))
- (*BI)->setAlignment(Align);
+ ChainBB->setAlignment(Align);
}
}
@@ -1110,6 +1138,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
MLI = &getAnalysis<MachineLoopInfo>();
TII = F.getSubtarget().getInstrInfo();
TLI = F.getSubtarget().getTargetLowering();
+ MDT = &getAnalysis<MachineDominatorTree>();
assert(BlockToChain.empty());
buildCFGChains(F);
@@ -1119,9 +1148,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
if (AlignAllBlock)
// Align all of the blocks in the function to a specific alignment.
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- FI->setAlignment(AlignAllBlock);
+ for (MachineBasicBlock &MBB : F)
+ MBB.setAlignment(AlignAllBlock);
// We always return true as we have no way to track whether the final order
// differs from the original order.
@@ -1176,20 +1204,19 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
- for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
- Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
- : NumUncondBranches;
- Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
- : UncondBranchTakenFreq;
- for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
- SE = I->succ_end();
- SI != SE; ++SI) {
+ for (MachineBasicBlock &MBB : F) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ Statistic &NumBranches =
+ (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
+ Statistic &BranchTakenFreq =
+ (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
+ for (MachineBasicBlock *Succ : MBB.successors()) {
// Skip if this successor is a fallthrough.
- if (I->isLayoutSuccessor(*SI))
+ if (MBB.isLayoutSuccessor(Succ))
continue;
- BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+ BlockFrequency EdgeFreq =
+ BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
++NumBranches;
BranchTakenFreq += EdgeFreq.getFrequency();
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 21b9c5a..f72d72a 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index cbd6272..9611122 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
I != E; ++I) {
unsigned MappedDef = *I;
// Source of copy is no longer available for propagation.
- if (AvailCopyMap.erase(MappedDef)) {
- for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
- AvailCopyMap.erase(*SR);
- }
+ AvailCopyMap.erase(MappedDef);
+ for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
+ AvailCopyMap.erase(*SR);
}
}
}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index df60cf3..467a2e4 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallBitVector.h"
using namespace llvm;
@@ -59,3 +60,68 @@ void MachineDominatorTree::releaseMemory() {
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
DT->print(OS);
}
+
+void MachineDominatorTree::applySplitCriticalEdges() const {
+ // Bail out early if there is nothing to do.
+ if (CriticalEdgesToSplit.empty())
+ return;
+
+ // For each element in CriticalEdgesToSplit, remember whether or not element
+ // is the new immediate domminator of its successor. The mapping is done by
+ // index, i.e., the information for the ith element of CriticalEdgesToSplit is
+ // the ith element of IsNewIDom.
+ SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true);
+ size_t Idx = 0;
+
+ // Collect all the dominance properties info, before invalidating
+ // the underlying DT.
+ for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+ // Update dominator information.
+ MachineBasicBlock *Succ = Edge.ToBB;
+ MachineDomTreeNode *SuccDTNode = DT->getNode(Succ);
+
+ for (MachineBasicBlock *PredBB : Succ->predecessors()) {
+ if (PredBB == Edge.NewBB)
+ continue;
+ // If we are in this situation:
+ // FromBB1 FromBB2
+ // + +
+ // + + + +
+ // + + + +
+ // ... Split1 Split2 ...
+ // + +
+ // + +
+ // +
+ // Succ
+ // Instead of checking the domiance property with Split2, we check it with
+ // FromBB2 since Split2 is still unknown of the underlying DT structure.
+ if (NewBBs.count(PredBB)) {
+ assert(PredBB->pred_size() == 1 && "A basic block resulting from a "
+ "critical edge split has more "
+ "than one predecessor!");
+ PredBB = *PredBB->pred_begin();
+ }
+ if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) {
+ IsNewIDom[Idx] = false;
+ break;
+ }
+ }
+ ++Idx;
+ }
+
+ // Now, update DT with the collected dominance properties info.
+ Idx = 0;
+ for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+ // We know FromBB dominates NewBB.
+ MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom[Idx])
+ DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode);
+ ++Idx;
+ }
+ NewBBs.clear();
+ CriticalEdgesToSplit.clear();
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 151a260..6ceace8 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -54,7 +54,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
unsigned FunctionNum, MachineModuleInfo &mmi)
- : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()),
+ : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
MMI(mmi) {
if (STI->getRegisterInfo())
RegInfo = new (Allocator) MachineRegisterInfo(this);
@@ -584,14 +584,6 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
return -++NumFixedObjects;
}
-int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) {
- // Force the use of a frame pointer. The intention is that this intrinsic be
- // used in conjunction with unwind mechanisms that leak the frame pointer.
- setFrameAddressIsTaken(true);
- Size = RoundUpToAlignment(Size, StackAlignment);
- return CreateStackObject(Size, StackAlignment, false);
-}
-
BitVector
MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
assert(MBB && "MBB must be valid");
@@ -903,16 +895,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
// DataLayout.
if (isa<PointerType>(A->getType()))
A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
- const_cast<Constant*>(A), TD);
+ const_cast<Constant *>(A), *TD);
else if (A->getType() != IntTy)
A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
- const_cast<Constant*>(A), TD);
+ const_cast<Constant *>(A), *TD);
if (isa<PointerType>(B->getType()))
B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
- const_cast<Constant*>(B), TD);
+ const_cast<Constant *>(B), *TD);
else if (B->getType() != IntTy)
B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
- const_cast<Constant*>(B), TD);
+ const_cast<Constant *>(B), *TD);
return A == B;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 981e4a3..1240efb 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -276,17 +276,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
/// print - Print the specified machine operand.
///
-void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
- // If the instruction is embedded into a basic block, we can find the
- // target info for the instruction.
- if (!TM)
- if (const MachineInstr *MI = getParent())
- if (const MachineBasicBlock *MBB = MI->getParent())
- if (const MachineFunction *MF = MBB->getParent())
- TM = &MF->getTarget();
- const TargetRegisterInfo *TRI =
- TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
-
+void MachineOperand::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) const {
switch (getType()) {
case MachineOperand::MO_Register:
OS << PrintReg(getReg(), TRI, getSubReg());
@@ -1512,23 +1503,19 @@ void MachineInstr::dump() const {
#endif
}
-static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
- raw_ostream &CommentOS) {
- const LLVMContext &Ctx = MF->getFunction()->getContext();
- DL.print(Ctx, CommentOS);
-}
-
-void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
- bool SkipOpers) const {
- // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+ // We can be a bit tidier if we know the MachineFunction.
const MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
if (const MachineBasicBlock *MBB = getParent()) {
MF = MBB->getParent();
- if (!TM && MF)
- TM = &MF->getTarget();
- if (MF)
+ if (MF) {
MRI = &MF->getRegInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ }
}
// Save a list of virtual registers.
@@ -1541,7 +1528,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
!getOperand(StartOp).isImplicit();
++StartOp) {
if (StartOp != 0) OS << ", ";
- getOperand(StartOp).print(OS, TM);
+ getOperand(StartOp).print(OS, TRI);
unsigned Reg = getOperand(StartOp).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
VirtRegs.push_back(Reg);
@@ -1551,8 +1538,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
OS << " = ";
// Print the opcode name.
- if (TM && TM->getSubtargetImpl()->getInstrInfo())
- OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode());
+ if (TII)
+ OS << TII->getName(getOpcode());
else
OS << "UNKNOWN";
@@ -1568,7 +1555,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
// Print asm string.
OS << " ";
- getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, TRI);
// Print HasSideEffects, MayLoad, MayStore, IsAlignStack
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1606,9 +1593,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
if (MRI->use_empty(Reg)) {
bool HasAliasLive = false;
- for (MCRegAliasIterator AI(
- Reg, TM->getSubtargetImpl()->getRegisterInfo(), true);
- AI.isValid(); ++AI) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
if (!MRI->use_empty(AliasReg)) {
HasAliasLive = true;
@@ -1641,10 +1626,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (DI.isVariable() && !DIV.getName().empty())
OS << "!\"" << DIV.getName() << '\"';
else
- MO.print(OS, TM);
- } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
- OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName(
- MO.getImm());
+ MO.print(OS, TRI);
+ } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ OS << TRI->getSubRegIndexName(MO.getImm());
} else if (i == AsmDescOp && MO.isImm()) {
// Pretty print the inline asm operand descriptor.
OS << '$' << AsmOpCount++;
@@ -1661,11 +1645,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
unsigned RCID = 0;
if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
- if (TM) {
- const TargetRegisterInfo *TRI =
- TM->getSubtargetImpl()->getRegisterInfo();
- OS << ':'
- << TRI->getRegClassName(TRI->getRegClass(RCID));
+ if (TRI) {
+ OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
} else
OS << ":RC" << RCID;
}
@@ -1679,7 +1660,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
// Compute the index of the next operand descriptor.
AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
} else
- MO.print(OS, TM);
+ MO.print(OS, TRI);
}
// Briefly indicate whether any call clobbers were omitted.
@@ -1715,7 +1696,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (!HaveSemi) OS << ";"; HaveSemi = true;
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
- OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC)
+ OS << " " << TRI->getRegClassName(RC)
<< ':' << PrintReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
if (MRI->getRegClass(VirtRegs[j]) != RC) {
@@ -1738,7 +1719,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
if (!InlinedAtDL.isUnknown() && MF) {
OS << " inlined @[ ";
- printDebugLoc(InlinedAtDL, MF, OS);
+ InlinedAtDL.print(OS);
OS << " ]";
}
}
@@ -1747,7 +1728,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
} else if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";";
OS << " dbg:";
- printDebugLoc(debugLoc, MF, OS);
+ debugLoc.print(OS);
}
OS << '\n';
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 64d0932..2f65a2e 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -54,6 +54,12 @@ HoistCheapInsts("hoist-cheap-insts",
cl::desc("MachineLICM should hoist even cheap instructions"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
+ cl::desc("MachineLICM should sink instructions into "
+ "loops to avoid register spills"),
+ cl::init(false), cl::Hidden);
+
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
STATISTIC(NumLowRP,
@@ -243,6 +249,11 @@ namespace {
void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
+ /// SinkIntoLoop - Sink instructions into loops if profitable. This
+ /// especially tries to prevent register spills caused by register pressure
+ /// if there is little to no overhead moving instructions into loops.
+ void SinkIntoLoop();
+
/// getRegisterClassIDAndCost - For a given MI, register, and the operand
/// index, return the ID and cost of its representative register class by
/// reference.
@@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
FirstInLoop = true;
HoistOutOfLoop(N);
CSEMap.clear();
+
+ if (SinkInstsToAvoidSpills)
+ SinkIntoLoop();
}
}
@@ -771,6 +785,53 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
}
}
+void MachineLICM::SinkIntoLoop() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ SmallVector<MachineInstr *, 8> Candidates;
+ for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
+ I != Preheader->instr_end(); ++I) {
+ // We need to ensure that we can safely move this instruction into the loop.
+ // As such, it must not have side-effects, e.g. such as a call has.
+ if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
+ Candidates.push_back(I);
+ }
+
+ for (MachineInstr *I : Candidates) {
+ const MachineOperand &MO = I->getOperand(0);
+ if (!MO.isDef() || !MO.isReg() || !MO.getReg())
+ continue;
+ if (!MRI->hasOneDef(MO.getReg()))
+ continue;
+ bool CanSink = true;
+ MachineBasicBlock *B = nullptr;
+ for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+ // FIXME: Come up with a proper cost model that estimates whether sinking
+ // the instruction (and thus possibly executing it on every loop
+ // iteration) is more expensive than a register.
+ // For now assumes that copies are cheap and thus almost always worth it.
+ if (!MI.isCopy()) {
+ CanSink = false;
+ break;
+ }
+ if (!B) {
+ B = MI.getParent();
+ continue;
+ }
+ B = DT->findNearestCommonDominator(B, MI.getParent());
+ if (!B) {
+ CanSink = false;
+ break;
+ }
+ }
+ if (!CanSink || !B || B == Preheader)
+ continue;
+ B->splice(B->getFirstNonPHI(), Preheader, I);
+ }
+}
+
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
}
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 89054d4..ce6abdd 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 32b7db1..278a8f2 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -65,7 +65,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterClass *NewRC =
- getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC);
+ getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF);
// Stop early if there is no room to grow.
if (NewRC == OldRC)
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 89ac6a8..7a3c80b 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -209,6 +209,11 @@ static MachineSchedRegistry
DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
useDefaultMachineSched);
+static cl::opt<bool> EnableMachineSched(
+ "enable-misched",
+ cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
+ cl::Hidden);
+
/// Forward declare the standard machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
@@ -304,6 +309,12 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (EnableMachineSched.getNumOccurrences()) {
+ if (!EnableMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enableMachineScheduler())
+ return false;
+
DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index bdb094f..991241e 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -397,7 +397,7 @@ void MachineVerifier::report(const char *msg,
assert(MO);
report(msg, MO->getParent());
errs() << "- operand " << MONum << ": ";
- MO->print(errs(), TM);
+ MO->print(errs(), TRI);
errs() << "\n";
}
@@ -739,7 +739,7 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
if (!isUInt<5>(MI->getOperand(1).getImm()))
report("Unknown asm flags", &MI->getOperand(1), 1);
- assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
+ static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed");
unsigned OpNo = InlineAsm::MIOp_FirstOperand;
unsigned NumOps;
@@ -927,7 +927,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (SubIdx) {
const TargetRegisterClass *SuperRC =
- TRI->getLargestLegalSuperClass(RC);
+ TRI->getLargestLegalSuperClass(RC, *MF);
if (!SuperRC) {
report("No largest legal super class exists.", MO, MONum);
return;
@@ -1573,7 +1573,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (!hasRead) {
// When tracking subregister liveness, the main range must start new
// values on partial register writes, even if there is no read.
- if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) {
+ if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 ||
+ !hasSubRegDef) {
report("Instruction ending live segment doesn't read the register",
MI);
errs() << S << " in " << LR << '\n';
@@ -1649,40 +1650,35 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
}
void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
- verifyLiveRange(LI, LI.reg);
-
unsigned Reg = LI.reg;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- unsigned Mask = 0;
- unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((Mask & SR.LaneMask) != 0)
- report("Lane masks of sub ranges overlap in live interval", MF, LI);
- if ((SR.LaneMask & ~MaxMask) != 0)
- report("Subrange lanemask is invalid", MF, LI);
- Mask |= SR.LaneMask;
- verifyLiveRange(SR, LI.reg, SR.LaneMask);
- if (!LI.covers(SR))
- report("A Subrange is not covered by the main range", MF, LI);
- }
- } else if (LI.hasSubRanges()) {
- report("subregister liveness only allowed for virtual registers", MF, LI);
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ verifyLiveRange(LI, Reg);
+
+ unsigned Mask = 0;
+ unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((Mask & SR.LaneMask) != 0)
+ report("Lane masks of sub ranges overlap in live interval", MF, LI);
+ if ((SR.LaneMask & ~MaxMask) != 0)
+ report("Subrange lanemask is invalid", MF, LI);
+ Mask |= SR.LaneMask;
+ verifyLiveRange(SR, LI.reg, SR.LaneMask);
+ if (!LI.covers(SR))
+ report("A Subrange is not covered by the main range", MF, LI);
}
// Check the LI only has one connected component.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- ConnectedVNInfoEqClasses ConEQ(*LiveInts);
- unsigned NumComp = ConEQ.Classify(&LI);
- if (NumComp > 1) {
- report("Multiple connected components in live interval", MF, LI);
- for (unsigned comp = 0; comp != NumComp; ++comp) {
- errs() << comp << ": valnos";
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
- E = LI.vni_end(); I!=E; ++I)
- if (comp == ConEQ.getEqClass(*I))
- errs() << ' ' << (*I)->id;
- errs() << '\n';
- }
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF, LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ errs() << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ errs() << ' ' << (*I)->id;
+ errs() << '\n';
}
}
}
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index def2e3d..d514190 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
@@ -46,6 +47,10 @@ SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
cl::Hidden, cl::desc("Split all critical edges during "
"PHI elimination"));
+static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
+ "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden,
+ cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true."));
+
namespace {
class PHIElimination : public MachineFunctionPass {
MachineRegisterInfo *MRI; // Machine register information
@@ -573,12 +578,14 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// there is a risk it may not be coalesced away.
//
// If the copy would be a kill, there is no need to split the edge.
- if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
+ bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB);
+ if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
continue;
-
- DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
- << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
- << ": " << *BBI);
+ if (ShouldSplit) {
+ DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+ << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+ << ": " << *BBI);
+ }
// If Reg is not live-in to MBB, it means it must be live-in to some
// other PreMBB successor, and we can avoid the interference by splitting
@@ -588,7 +595,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// is likely to be left after coalescing. If we are looking at a loop
// exiting edge, split it so we won't insert code in the loop, otherwise
// don't bother.
- bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;
+ ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB);
// Check for a loop exiting edge.
if (!ShouldSplit && CurLoop != PreLoop) {
@@ -603,7 +610,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// Split unless this edge is entering CurLoop from an outer loop.
ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
}
- if (!ShouldSplit)
+ if (!ShouldSplit && !SplitAllCriticalEdges)
continue;
if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
DEBUG(dbgs() << "Failed to split critical edge.\n");
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 272d068..c128414 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -23,8 +23,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
@@ -55,9 +54,6 @@ static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
static cl::opt<cl::boolOrDefault>
OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
cl::desc("Enable optimized register allocation compilation path."));
-static cl::opt<cl::boolOrDefault>
-EnableMachineSched("enable-misched",
- cl::desc("Enable the machine instruction scheduling pass."));
static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
cl::Hidden,
cl::desc("Disable Machine LICM"));
@@ -116,28 +112,6 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
return PassID;
}
-/// Allow Pass selection to be overriden by command line options. This supports
-/// flags with ternary conditions. TargetID is passed through by default. The
-/// pass is suppressed when the option is false. When the option is true, the
-/// StandardID is selected if the target provides no default.
-static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
- cl::boolOrDefault Override,
- AnalysisID StandardID) {
- switch (Override) {
- case cl::BOU_UNSET:
- return TargetID;
- case cl::BOU_TRUE:
- if (TargetID.isValid())
- return TargetID;
- if (StandardID == nullptr)
- report_fatal_error("Target cannot enable pass");
- return StandardID;
- case cl::BOU_FALSE:
- return IdentifyingPassPtr();
- }
- llvm_unreachable("Invalid command line option state");
-}
-
/// Allow standard passes to be disabled by the command line, regardless of who
/// is adding the pass.
///
@@ -182,9 +156,6 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
if (StandardID == &MachineCSEID)
return applyDisable(TargetID, DisableMachineCSE);
- if (StandardID == &MachineSchedulerID)
- return applyOverride(TargetID, EnableMachineSched, StandardID);
-
if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
return applyDisable(TargetID, DisablePostRAMachineLICM);
@@ -249,11 +220,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
// Substitute Pseudo Pass IDs for real ones.
substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
-
- // Temporarily disable experimental passes.
- const TargetSubtargetInfo &ST = *TM->getSubtargetImpl();
- if (!ST.useMachineScheduler())
- disablePass(&MachineSchedulerID);
}
/// Insert InsertedPassID pass after TargetPassID.
@@ -409,10 +375,8 @@ void TargetPassConfig::addIRPasses() {
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
- if (!DisableVerify) {
+ if (!DisableVerify)
addPass(createVerifierPass());
- addPass(createDebugInfoVerifierPass());
- }
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
@@ -455,7 +419,11 @@ void TargetPassConfig::addPassesToHandleExceptions() {
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::WinEH:
+ // We support using both GCC-style and MSVC-style exceptions on Windows, so
+ // add both preparation passes. Each pass will only actually run if it
+ // recognizes the personality function.
addPass(createWinEHPass(TM));
+ addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
addPass(createLowerInvokePass());
@@ -479,12 +447,6 @@ void TargetPassConfig::addCodeGenPrepare() {
void TargetPassConfig::addISelPrepare() {
addPreISel();
- // Need to verify DebugInfo *before* creating the stack protector analysis.
- // It's a function pass, and verifying between it and its users causes a
- // crash.
- if (!DisableVerify)
- addPass(createDebugInfoVerifierPass());
-
addPass(createStackProtectorPass(TM));
if (PrintISelInput)
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 283d1f2..ebe05e3 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -76,6 +76,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -411,8 +412,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
if (ExtendLife && !ExtendedUses.empty())
// Extend the liveness of the extension result.
- std::copy(ExtendedUses.begin(), ExtendedUses.end(),
- std::back_inserter(Uses));
+ Uses.append(ExtendedUses.begin(), ExtendedUses.end());
// Now replace all uses.
bool Changed = false;
@@ -916,7 +916,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
// => v0 = COPY v1
// Currently we haven't seen motivating example for that and we
// want to avoid untested code.
- NumRewrittenCopies += Changed == true;
+ NumRewrittenCopies += Changed;
return Changed;
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 6d29b98..e073e6a 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#include "PrologEpilogInserter.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -28,6 +27,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -48,6 +48,53 @@ using namespace llvm;
#define DEBUG_TYPE "pei"
+namespace {
+class PEI : public MachineFunctionPass {
+public:
+ static char ID;
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock *EntryBlock;
+ SmallVector<MachineBasicBlock *, 4> ReturnBlocks;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the current function.
+ bool FrameIndexVirtualScavenging;
+
+ void calculateSets(MachineFunction &Fn);
+ void calculateCallsInformation(MachineFunction &Fn);
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ int &SPAdj);
+ void scavengeFrameVirtualRegs(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Convenience for recognizing return blocks.
+ bool isReturnBlock(MachineBasicBlock *MBB);
+};
+} // namespace
+
char PEI::ID = 0;
char &llvm::PrologEpilogCodeInserterID = PEI::ID;
@@ -810,17 +857,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
continue;
}
- // Frame allocations are target independent. Simply swap the index with
- // the offset.
- if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) {
- assert(TFI->hasFP(Fn) && "frame alloc requires FP");
- MachineOperand &FI = MI->getOperand(i);
- unsigned Reg;
- int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg);
- FI.ChangeToImmediate(FrameOffset);
- continue;
- }
-
// Some instructions (e.g. inline asm instructions) can have
// multiple frame indices and/or cause eliminateFrameIndex
// to insert more than one instruction. We need the register
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
deleted file mode 100644
index f88b8ef..0000000
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is responsible for finalizing the functions frame layout, saving
-// callee saved registers, and for emitting prolog & epilog code for the
-// function.
-//
-// This pass must be run after register allocation. After this pass is
-// executed, it is illegal to construct MO_FrameIndex operands.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
-#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-namespace llvm {
- class RegScavenger;
- class MachineBasicBlock;
-
- class PEI : public MachineFunctionPass {
- public:
- static char ID;
- PEI() : MachineFunctionPass(ID) {
- initializePEIPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
- /// frame indexes with appropriate references.
- ///
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
- private:
- RegScavenger *RS;
-
- // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
- // stack frame indexes.
- unsigned MinCSFrameIndex, MaxCSFrameIndex;
-
- // Entry and return blocks of the current function.
- MachineBasicBlock* EntryBlock;
- SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
-
- // Flag to control whether to use the register scavenger to resolve
- // frame index materialization registers. Set according to
- // TRI->requiresFrameIndexScavenging() for the curren function.
- bool FrameIndexVirtualScavenging;
-
- void calculateSets(MachineFunction &Fn);
- void calculateCallsInformation(MachineFunction &Fn);
- void calculateCalleeSavedRegisters(MachineFunction &Fn);
- void insertCSRSpillsAndRestores(MachineFunction &Fn);
- void calculateFrameObjectOffsets(MachineFunction &Fn);
- void replaceFrameIndices(MachineFunction &Fn);
- void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
- int &SPAdj);
- void scavengeFrameVirtualRegs(MachineFunction &Fn);
- void insertPrologEpilogCode(MachineFunction &Fn);
-
- // Convenience for recognizing return blocks.
- bool isReturnBlock(MachineBasicBlock* MBB);
- };
-} // End llvm namespace
-#endif
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 6b346f4..16ff48e 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -27,6 +27,7 @@
#endif
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Timer.h"
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index edc3294..e94f1bb 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1554,7 +1554,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
- const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC);
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(CurRC, *MF);
unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
// Split around every non-copy instruction if this split will relax
// the constraints on the virtual register.
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 77a42b3..eeff73d 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -178,8 +178,40 @@ class Interference : public PBQPRAConstraint {
private:
typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
- typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey;
- typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+ typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
+ typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+ typedef DenseSet<IKey> DisjointAllowedRegsCache;
+ typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
+ typedef DenseSet<IEdgeKey> IEdgeCache;
+
+ bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId,
+ const DisjointAllowedRegsCache &D) const {
+ const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+ const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+ if (NRegs == MRegs)
+ return false;
+
+ if (NRegs < MRegs)
+ return D.count(IKey(NRegs, MRegs)) > 0;
+
+ return D.count(IKey(MRegs, NRegs)) > 0;
+ }
+
+ void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId,
+ DisjointAllowedRegsCache &D) {
+ const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+ const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+ assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
+
+ if (NRegs < MRegs)
+ D.insert(IKey(NRegs, MRegs));
+ else
+ D.insert(IKey(MRegs, NRegs));
+ }
// Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
// for the fast interference graph construction algorithm. The last is there
@@ -247,6 +279,13 @@ public:
// and uniquing them.
IMatrixCache C;
+ // Finding an edge is expensive in the worst case (O(max_clique(G))). So
+ // cache locally edges we have already seen.
+ IEdgeCache EC;
+
+ // Cache known disjoint allowed registers pairs
+ DisjointAllowedRegsCache D;
+
typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
decltype(&lowestStartPoint)> IntervalQueue;
@@ -290,14 +329,21 @@ public:
for (const auto &A : Active) {
PBQP::GraphBase::NodeId MId = getNodeId(A);
+ // Do not add an edge when the nodes' allowed registers do not
+ // intersect: there is obviously no interference.
+ if (haveDisjointAllowedRegs(G, NId, MId, D))
+ continue;
+
// Check that we haven't already added this edge
- // FIXME: findEdge is expensive in the worst case (O(max_clique(G))).
- // It might be better to replace this with a local bit-matrix.
- if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId())
+ IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
+ if (EC.count(EK))
continue;
// This is a new edge - add it to the graph.
- createInterferenceEdge(G, NId, MId, C);
+ if (!createInterferenceEdge(G, NId, MId, C))
+ setDisjointAllowedRegs(G, NId, MId, D);
+ else
+ EC.insert(EK);
}
// Finally, add Cur to the Active set.
@@ -307,35 +353,48 @@ public:
private:
- void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
- PBQPRAGraph::NodeId MId, IMatrixCache &C) {
+ // Create an Interference edge and add it to the graph, unless it is
+ // a null matrix, meaning the nodes' allowed registers do not have any
+ // interference. This case occurs frequently between integer and floating
+ // point registers for example.
+ // return true iff both nodes interferes.
+ bool createInterferenceEdge(PBQPRAGraph &G,
+ PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
+ IMatrixCache &C) {
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getSubtarget().getRegisterInfo();
-
const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
// Try looking the edge costs up in the IMatrixCache first.
- IMatrixKey K(&NRegs, &MRegs);
+ IKey K(&NRegs, &MRegs);
IMatrixCache::iterator I = C.find(K);
if (I != C.end()) {
G.addEdgeBypassingCostAllocator(NId, MId, I->second);
- return;
+ return true;
}
PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
+ bool NodesInterfere = false;
for (unsigned I = 0; I != NRegs.size(); ++I) {
unsigned PRegN = NRegs[I];
for (unsigned J = 0; J != MRegs.size(); ++J) {
unsigned PRegM = MRegs[J];
- if (TRI.regsOverlap(PRegN, PRegM))
+ if (TRI.regsOverlap(PRegN, PRegM)) {
M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ NodesInterfere = true;
+ }
}
}
+ if (!NodesInterfere)
+ return false;
+
PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
C[K] = G.getEdgeCostsPtr(EId);
+
+ return true;
}
};
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index ab33672..178fa18 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -131,7 +131,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
RCI.NumRegs = StressRA;
// Check if RC is a proper sub-class.
- if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
+ if (const TargetRegisterClass *Super =
+ TRI->getLargestLegalSuperClass(RC, *MF))
if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
RCI.ProperSubClass = true;
@@ -175,6 +176,6 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
}
compute(RC);
unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
- return TRI->getRegPressureSetLimit(Idx)
- - TRI->getRegClassWeight(RC).RegWeight * NReserved;
+ return TRI->getRegPressureSetLimit(*MF, Idx) -
+ TRI->getRegClassWeight(RC).RegWeight * NReserved;
}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 1e4cfe8..9e3cf41 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -58,6 +58,10 @@ EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
cl::init(true));
+static cl::opt<bool> UseTerminalRule("terminal-rule",
+ cl::desc("Apply the terminal rule"),
+ cl::init(false));
+
/// Temporary flag to test critical edge unsplitting.
static cl::opt<bool>
EnableJoinSplits("join-splitedges",
@@ -160,12 +164,14 @@ namespace {
/// LaneMask are split as necessary. @p LaneMask are the lanes that
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
/// lanemasks already adjusted to the coalesced register.
- void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+ /// @returns false if live range conflicts couldn't get resolved.
+ bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
unsigned LaneMask, CoalescerPair &CP);
/// Join the liveranges of two subregisters. Joins @p RRange into
/// @p LRange, @p RRange may be invalid afterwards.
- void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ /// @returns false if live range conflicts couldn't get resolved.
+ bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
unsigned LaneMask, const CoalescerPair &CP);
/// We found a non-trivially-coalescable copy. If the source value number is
@@ -204,6 +210,20 @@ namespace {
/// Returns true if @p CopyMI was a copy of an undef value and eliminated.
bool eliminateUndefCopy(MachineInstr *CopyMI);
+ /// Check whether or not we should apply the terminal rule on the
+ /// destination (Dst) of \p Copy.
+ /// When the terminal rule applies, Copy is not profitable to
+ /// coalesce.
+ /// Dst is terminal if it has exactly one affinity (Dst, Src) and
+ /// at least one interference (Dst, Dst2). If Dst is terminal, the
+ /// terminal rule consists in checking that at least one of
+ /// interfering node, say Dst2, has an affinity of equal or greater
+ /// weight with Src.
+ /// In that case, Dst2 and Dst will not be able to be both coalesced
+ /// with Src. Since Dst2 exposes more coalescing opportunities than
+ /// Dst, we can drop \p Copy.
+ bool applyTerminalRule(const MachineInstr &Copy) const;
+
public:
static char ID; ///< Class identification, replacement for typeinfo
RegisterCoalescer() : MachineFunctionPass(ID) {
@@ -1143,7 +1163,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
- if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) {
+ if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
if (!DstInt->hasSubRanges()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
@@ -1756,6 +1776,9 @@ public:
void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs);
+ /// Remove liverange defs at places where implicit defs will be removed.
+ void removeImplicitDefs();
+
/// Get the value assignments suitable for passing to LiveInterval::join.
const int *getAssignments() const { return Assignments.data(); }
};
@@ -1856,7 +1879,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
assert(DefMI != nullptr);
if (SubRangeJoin) {
// We don't care about the lanes when joining subregister ranges.
- V.ValidLanes = V.WriteLanes = 1;
+ V.WriteLanes = V.ValidLanes = 1;
+ if (DefMI->isImplicitDef()) {
+ V.ValidLanes = 0;
+ V.ErasableImplicitDef = true;
+ }
} else {
bool Redef = false;
V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
@@ -2339,6 +2366,18 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
LI.removeEmptySubRanges();
}
+void JoinVals::removeImplicitDefs() {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
+ continue;
+
+ VNInfo *VNI = LR.getValNumInfo(i);
+ VNI->markUnused();
+ LR.removeValNo(VNI);
+ }
+}
+
void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
@@ -2382,7 +2421,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
}
}
-void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
unsigned LaneMask,
const CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
@@ -2392,12 +2431,19 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
NewVNInfo, CP, LIS, TRI, true, true);
// Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
- // Conflicts should already be resolved so the mapping/resolution should
- // always succeed.
- if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
- llvm_unreachable("Can't join subrange although main ranges are compatible");
- if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
- llvm_unreachable("Can't join subrange although main ranges are compatible");
+ // We should be able to resolve all conflicts here as we could successfully do
+ // it on the mainrange already. There is however a problem when multiple
+ // ranges get mapped to the "overflow" lane mask bit which creates unexpected
+ // interferences.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
+ DEBUG(dbgs() << "*** Couldn't join subrange!\n");
+ return false;
+ }
+ if (!LHSVals.resolveConflicts(RHSVals) ||
+ !RHSVals.resolveConflicts(LHSVals)) {
+ DEBUG(dbgs() << "*** Couldn't join subrange!\n");
+ return false;
+ }
// The merging algorithm in LiveInterval::join() can't handle conflicting
// value mappings, so we need to remove any live ranges that overlap a
@@ -2407,6 +2453,9 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
LHSVals.pruneValues(RHSVals, EndPoints, false);
RHSVals.pruneValues(LHSVals, EndPoints, false);
+ LHSVals.removeImplicitDefs();
+ RHSVals.removeImplicitDefs();
+
LRange.verify();
RRange.verify();
@@ -2416,16 +2465,17 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
if (EndPoints.empty())
- return;
+ return true;
// Recompute the parts of the live range we had to remove because of
// CR_Replace conflicts.
DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
<< " points: " << LRange << '\n');
LIS->extendToIndices(LRange, EndPoints);
+ return true;
}
-void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
const LiveRange &ToMerge,
unsigned LaneMask, CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
@@ -2453,7 +2503,8 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
CommonRange = &R;
}
LiveRange RangeCopy(ToMerge, Allocator);
- joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
+ if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
+ return false;
LaneMask &= ~RMask;
}
@@ -2461,13 +2512,14 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask));
LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
}
+ return true;
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
- bool TrackSubRegLiveness = MRI->tracksSubRegLiveness();
+ bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
TRI, false, TrackSubRegLiveness);
JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
@@ -2511,22 +2563,40 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Determine lanemasks of RHS in the coalesced register and merge subranges.
unsigned SrcIdx = CP.getSrcIdx();
+ bool Abort = false;
if (!RHS.hasSubRanges()) {
unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
: TRI->getSubRegIndexLaneMask(SrcIdx);
- mergeSubRangeInto(LHS, RHS, Mask, CP);
+ if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
+ Abort = true;
} else {
// Pair up subranges and merge.
for (LiveInterval::SubRange &R : RHS.subranges()) {
unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
- mergeSubRangeInto(LHS, R, Mask, CP);
+ if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
+ Abort = true;
+ break;
+ }
}
}
+ if (Abort) {
+ // This shouldn't have happened :-(
+ // However we are aware of at least one existing problem where we
+ // can't merge subranges when multiple ranges end up in the
+ // "overflow bit" 32. As a workaround we drop all subregister ranges
+ // which means we loose some precision but are back to a well defined
+ // state.
+ assert((CP.getNewRC()->getLaneMask() & 0x80000000u)
+ && "SubRange merge should only fail when merging into bit 32.");
+ DEBUG(dbgs() << "\tSubrange join aborted!\n");
+ LHS.clearSubRanges();
+ RHS.clearSubRanges();
+ } else {
+ DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
- DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
-
- LHSVals.pruneSubRegValues(LHS, ShrinkMask);
- RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ }
}
// The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2645,6 +2715,58 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
return Progress;
}
+/// Check if DstReg is a terminal node.
+/// I.e., it does not have any affinity other than \p Copy.
+static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
+ const MachineRegisterInfo *MRI) {
+ assert(Copy.isCopyLike());
+ // Check if the destination of this copy as any other affinity.
+ for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
+ if (&MI != &Copy && MI.isCopyLike())
+ return false;
+ return true;
+}
+
+bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
+ assert(Copy.isCopyLike());
+ if (!UseTerminalRule)
+ return false;
+ // Check if the destination of this copy has any other affinity.
+ unsigned DstReg = Copy.getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ !isTerminalReg(DstReg, Copy, MRI))
+ return false;
+
+ // DstReg is a terminal node. Check if it inteferes with any other
+ // copy involving SrcReg.
+ unsigned SrcReg = Copy.getOperand(1).getReg();
+ const MachineBasicBlock *OrigBB = Copy.getParent();
+ const LiveInterval &DstLI = LIS->getInterval(DstReg);
+ for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
+ // Technically we should check if the weight of the new copy is
+ // interesting compared to the other one and update the weight
+ // of the copies accordingly. However, this would only work if
+ // we would gather all the copies first then coalesce, whereas
+ // right now we interleave both actions.
+ // For now, just consider the copies that are in the same block.
+ if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
+ continue;
+ unsigned OtherReg = MI.getOperand(0).getReg();
+ if (OtherReg == SrcReg)
+ OtherReg = MI.getOperand(1).getReg();
+ // Check if OtherReg is a non-terminal.
+ if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
+ isTerminalReg(OtherReg, MI, MRI))
+ continue;
+ // Check that OtherReg interfere with DstReg.
+ if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
+ DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n');
+ return true;
+ }
+ }
+ return false;
+}
+
void
RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
DEBUG(dbgs() << MBB->getName() << ":\n");
@@ -2659,7 +2781,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
// cmp+jmp macro fusion.
for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
MII != E; ++MII) {
- if (!MII->isCopyLike())
+ if (!MII->isCopyLike() || applyTerminalRule(*MII))
continue;
if (isLocalCopy(&(*MII), LIS))
LocalWorkList.push_back(&(*MII));
@@ -2670,7 +2792,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
else {
for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
MII != E; ++MII)
- if (MII->isCopyLike())
+ if (MII->isCopyLike() && !applyTerminalRule(*MII))
WorkList.push_back(MII);
}
// Try coalescing the collected copies immediately, and remove the nulls.
@@ -2741,7 +2863,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
if (EnableGlobalCopies == cl::BOU_UNSET)
- JoinGlobalCopies = STI.useMachineScheduler();
+ JoinGlobalCopies = STI.enableJoinGlobalCopies();
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 9925efb..3634103 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -304,6 +304,7 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end();
}
+namespace {
/// Collect this instruction's unique uses and defs into SmallVectors for
/// processing defs and uses in order.
///
@@ -354,6 +355,7 @@ protected:
}
}
};
+} // namespace
/// Collect physical and virtual register operands.
static void collectOperands(const MachineInstr *MI,
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 78bfd23..17dd729 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -96,14 +96,15 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
static void getUnderlyingObjects(const Value *V,
- SmallVectorImpl<Value *> &Objects) {
+ SmallVectorImpl<Value *> &Objects,
+ const DataLayout &DL) {
SmallPtrSet<const Value *, 16> Visited;
SmallVector<const Value *, 4> Working(1, V);
do {
V = Working.pop_back_val();
SmallVector<Value *, 4> Objs;
- GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
I != IE; ++I) {
@@ -132,7 +133,8 @@ UnderlyingObjectsVector;
/// object, return the Value for that object.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo *MFI,
- UnderlyingObjectsVector &Objects) {
+ UnderlyingObjectsVector &Objects,
+ const DataLayout &DL) {
if (!MI->hasOneMemOperand() ||
(!(*MI->memoperands_begin())->getValue() &&
!(*MI->memoperands_begin())->getPseudoValue()) ||
@@ -156,7 +158,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
return;
SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs);
+ getUnderlyingObjects(V, Objs, DL);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
I != IE; ++I) {
@@ -468,7 +470,8 @@ static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
// This MI might have either incomplete info, or known to be unsafe
// to deal with (i.e. volatile object).
static inline bool isUnsafeMemoryObject(MachineInstr *MI,
- const MachineFrameInfo *MFI) {
+ const MachineFrameInfo *MFI,
+ const DataLayout &DL) {
if (!MI || MI->memoperands_empty())
return true;
// We purposefully do no check for hasOneMemOperand() here
@@ -491,7 +494,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
return true;
SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs);
+ getUnderlyingObjects(V, Objs, DL);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(),
IE = Objs.end(); I != IE; ++I) {
// Does this pointer refer to a distinct and identifiable object?
@@ -508,7 +511,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
/// these two MIs be reordered during scheduling from memory dependency
/// point of view.
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- MachineInstr *MIa,
+ const DataLayout &DL, MachineInstr *MIa,
MachineInstr *MIb) {
const MachineFunction *MF = MIa->getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -527,7 +530,7 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
return true;
- if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
+ if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
return true;
// If we are dealing with two "normal" loads, we do not need an edge
@@ -579,10 +582,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
/// This recursive function iterates over chain deps of SUb looking for
/// "latest" node that needs a chain edge to SUa.
-static unsigned
-iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
- SmallPtrSetImpl<const SUnit*> &Visited) {
+static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ const DataLayout &DL, SUnit *SUa, SUnit *SUb,
+ SUnit *ExitSU, unsigned *Depth,
+ SmallPtrSetImpl<const SUnit *> &Visited) {
if (!SUa || !SUb || SUb == ExitSU)
return *Depth;
@@ -607,7 +610,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
// add that edge to the predecessors chain of SUb,
// and stop descending.
if (*Depth > 200 ||
- MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
SUb->addPred(SDep(SUa, SDep::MayAliasMem));
return *Depth;
}
@@ -617,7 +620,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
I != E; ++I)
if (I->isNormalMemoryOrBarrier())
- iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+ iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
return *Depth;
}
@@ -626,7 +629,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
/// checks whether SU can be aliasing any node dominated
/// by it.
static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+ const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
+ std::set<SUnit *> &CheckList,
unsigned LatencyToLoad) {
if (!SU)
return;
@@ -638,7 +642,7 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
I != IE; ++I) {
if (SU == *I)
continue;
- if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+ if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
SDep Dep(SU, SDep::MayAliasMem);
Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
(*I)->addPred(Dep);
@@ -649,22 +653,22 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
JE = (*I)->Succs.end(); J != JE; ++J)
if (J->isNormalMemoryOrBarrier())
- iterateChainSucc (AA, MFI, SU, J->getSUnit(),
- ExitSU, &Depth, Visited);
+ iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
+ Visited);
}
}
/// Check whether two objects need a chain edge, if so, add it
/// otherwise remember the rejected SU.
-static inline
-void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
- SUnit *SUa, SUnit *SUb,
- std::set<SUnit *> &RejectList,
- unsigned TrueMemOrderLatency = 0,
- bool isNormalMemory = false) {
+static inline void addChainDependency(AliasAnalysis *AA,
+ const MachineFrameInfo *MFI,
+ const DataLayout &DL, SUnit *SUa,
+ SUnit *SUb, std::set<SUnit *> &RejectList,
+ unsigned TrueMemOrderLatency = 0,
+ bool isNormalMemory = false) {
// If this is a false dependency,
// do not add the edge, but rememeber the rejected node.
- if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
SUb->addPred(Dep);
@@ -883,7 +887,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain = SU;
// This is a barrier event that acts as a pivotal node in the DAG,
// so it is safe to clear list of exposed nodes.
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
RejectMemNodes.clear();
NonAliasMemDefs.clear();
@@ -896,25 +900,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
unsigned ChainLatency = 0;
if (AliasChain->getInstr()->mayLoad())
ChainLatency = TrueMemOrderLatency;
- addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
- ChainLatency);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ RejectMemNodes, ChainLatency);
}
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes);
}
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
- TrueMemOrderLatency);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, TrueMemOrderLatency);
}
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
PendingLoads.clear();
AliasMemDefs.clear();
@@ -928,7 +934,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain->addPred(SDep(SU, SDep::Barrier));
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs);
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
if (Objs.empty()) {
// Treat all other stores conservatively.
@@ -952,8 +958,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
- 0, true);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, 0, true);
// If we're not using AA, then we only need one store per object.
if (!AAForDep)
@@ -977,7 +983,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ J->second[i], RejectMemNodes,
TrueMemOrderLatency, true);
J->second.clear();
}
@@ -986,13 +993,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
- addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ RejectMemNodes);
}
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
} else if (MI->mayLoad()) {
bool MayAlias = true;
@@ -1000,7 +1009,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Invariant load, no chain dependencies needed!
} else {
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs);
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
if (Objs.empty()) {
// A load with no underlying object. Depend on all
@@ -1008,8 +1017,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i],
- RejectMemNodes);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes);
PendingLoads.push_back(SU);
MayAlias = true;
@@ -1032,18 +1041,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i],
- RejectMemNodes, 0, true);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, 0, true);
if (ThisMayAlias)
AliasMemUses[V].push_back(SU);
else
NonAliasMemUses[V].push_back(SU);
}
if (MayAlias)
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
+ adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
+ RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
- addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
@@ -1211,7 +1222,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
else if (SU == &ExitSU)
oss << "<exit>";
else
- SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
+ SU->getInstr()->print(oss, /*SkipOpers=*/true);
return oss.str();
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6129401..a1c84c5 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -246,10 +246,11 @@ namespace {
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
SDValue visitAND(SDNode *N);
+ SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitOR(SDNode *N);
+ SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
- SDValue SimplifyVUnaryOp(SDNode *N);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -302,6 +303,7 @@ namespace {
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitSCALAR_TO_VECTOR(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);
@@ -713,6 +715,22 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
return nullptr;
}
+static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
+ if (isa<ConstantSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+ return N.getNode();
+ return nullptr;
+}
+
+static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
+ if (isa<ConstantFPSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+ return N.getNode();
+ return nullptr;
+}
+
// \brief Returns the SDNode if it is a constant splat BuildVector or constant
// int.
static ConstantSDNode *isConstOrConstSplat(SDValue N) {
@@ -1180,11 +1198,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
- // Early exit if this basic block is in an optnone function.
- if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
- Attribute::OptimizeNone))
- return;
-
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
@@ -1369,6 +1382,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSTORE: return visitMSTORE(N);
@@ -2685,6 +2699,109 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
}
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an And operation to a single value. This makes them reusable in the context
+/// of visitSELECT(). Rules involving constants are not included as
+/// visitSELECT() already handles those cases.
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
+ EVT VT = N1.getValueType();
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ }
+ // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+ if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+ Op0 == Op1 && LL.getValueType().isInteger() &&
+ Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
+ cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
+ (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ cast<ConstantSDNode>(RR)->isNullValue()))) {
+ SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
+ LL, DAG.getConstant(1, LL.getValueType()));
+ AddToWorklist(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+ DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ getSetCCResultType(N0.getSimpleValueType())))))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, SDLoc(N0), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ // Return N so it doesn't get rechecked!
+ return SDValue(LocReference, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2716,9 +2833,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return N0;
}
- // fold (and x, undef) -> 0
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
// fold (and c1, c2) -> c1&c2
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -2808,9 +2922,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
- Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
- Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if (SplatBitSize % BitWidth == 0) {
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
}
}
@@ -2863,118 +2981,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
- // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
- if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
- }
- // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- }
- // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
- if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
- Op0 == Op1 && LL.getValueType().isInteger() &&
- Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
- cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
- (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
- cast<ConstantSDNode>(RR)->isNullValue()))) {
- SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
- LL, DAG.getConstant(1, LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ADDNode,
- DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getSimpleValueType())))))
- return DAG.getSetCC(SDLoc(N), N0.getValueType(),
- LL, LR, Result);
- }
- }
-
- // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
-
- // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
- // fold (and (sra)) -> (and (srl)) when possible.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
- // fold (zext_inreg (extload x)) -> (zextload x)
- if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
- // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
- if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- N0.hasOneUse()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
@@ -3046,33 +3052,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
- VT.getSizeInBits() <= 64) {
- if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- APInt ADDC = ADDI->getAPIntValue();
- if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
- // immediate for an add, but it is legal if its top c2 bits are set,
- // transform the ADD so the immediate doesn't need to be materialized
- // in a register.
- if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
- APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
- SRLI->getZExtValue());
- if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
- ADDC |= Mask;
- if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- SDValue NewAdd =
- DAG.getNode(ISD::ADD, SDLoc(N0), VT,
- N0.getOperand(0), DAG.getConstant(ADDC, VT));
- CombineTo(N0.getNode(), NewAdd);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
- }
- }
- }
+ if (SDValue Combined = visitANDLike(N0, N1, N))
+ return Combined;
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
}
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -3338,6 +3371,98 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
}
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an Or operation to a single value \see visitANDLike().
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+ EVT VT = N1.getValueType();
+ // fold (or x, undef) -> -1
+ if (!LegalOperations &&
+ (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ getSetCCResultType(N0.getValueType())))))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &RHSMask =
+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, X,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+ }
+ }
+
+ // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(1), N1.getOperand(1));
+ return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3425,12 +3550,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
- // fold (or x, undef) -> -1
- if (!LegalOperations &&
- (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
- return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
- }
// fold (or c1, c2) -> c1|c2
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -3449,6 +3568,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+ if (SDValue Combined = visitORLike(N0, N1, N))
+ return Combined;
+
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
SDValue BSwap = MatchBSwapHWord(N, N0, N1);
if (BSwap.getNode())
@@ -3474,91 +3596,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return SDValue();
}
}
- // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
- // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
- if (cast<ConstantSDNode>(LR)->isNullValue() &&
- (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
- // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
- (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getValueType())))))
- return DAG.getSetCC(SDLoc(N), N0.getValueType(),
- LL, LR, Result);
- }
- }
-
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
if (Tmp.getNode()) return Tmp;
}
- // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
- if (N0.getOpcode() == ISD::AND &&
- N1.getOpcode() == ISD::AND &&
- N0.getOperand(1).getOpcode() == ISD::Constant &&
- N1.getOperand(1).getOpcode() == ISD::Constant &&
- // Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
- // We can only do this xform if we know that bits from X that are set in C2
- // but not in C1 are already zero. Likewise for Y.
- const APInt &LHSMask =
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- const APInt &RHSMask =
- cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
-
- if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
- DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
- SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
- N0.getOperand(0), N1.getOperand(0));
- return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
- DAG.getConstant(LHSMask | RHSMask, VT));
- }
- }
-
- // (or (and X, M), (and X, N)) -> (and X, (or M, N))
- if (N0.getOpcode() == ISD::AND &&
- N1.getOpcode() == ISD::AND &&
- N0.getOperand(0) == N1.getOperand(0) &&
- // Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
- SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
- N0.getOperand(1), N1.getOperand(1));
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X);
- }
-
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
@@ -3947,6 +3990,32 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+ // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
+ // Here is a concrete example of this equivalence:
+ // i16 x == 14
+ // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
+ // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
+ //
+ // =>
+ //
+ // i16 ~1 == 0b1111111111111110
+ // i16 rol(~1, 14) == 0b1011111111111111
+ //
+ // Some additional tips to help conceptualize this transform:
+ // - Try to see the operation as placing a single zero in a value of all ones.
+ // - There exists no value for x which would allow the result to contain zero.
+ // - Values of x larger than the bitwidth are undefined and do not require a
+ // consistent result.
+ // - Pushing the zero left requires shifting one bits in from the right.
+ // A rotate left of ~1 is a nice way of achieving the desired result.
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()))
+ if (N0.getOpcode() == ISD::SHL)
+ if (auto *ShlLHS = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ if (N1C->isAllOnesValue() && ShlLHS->isOne())
+ return DAG.getNode(ISD::ROTL, SDLoc(N), VT, DAG.getConstant(~1, VT),
+ N0.getOperand(1));
+
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
@@ -4792,6 +4861,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SimplifySelect(SDLoc(N), N0, N1, N2);
}
+ if (VT0 == MVT::i1) {
+ if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+ InnerSelect, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+ InnerSelect);
+ }
+ }
+
+ // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+ if (N1->getOpcode() == ISD::SELECT) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2) {
+ // Create the actual and node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+ N0, N1_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+ N1_1, N2);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1_1, N2);
+ }
+ }
+ // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+ if (N2->getOpcode() == ISD::SELECT) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1) {
+ // Create the actual or node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+ N0, N2_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+ N1, N2_2);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1, N2_2);
+ }
+ }
+ }
+
return SDValue();
}
@@ -6440,7 +6572,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getValueType() == N->getValueType(0))
return N0;
// fold (truncate c1) -> c1
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
@@ -7453,14 +7585,23 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// Fold scalars or any vector constants (not just splats).
// This fold is done in general by InstCombine, but extra fmul insts
// may have been generated during lowering.
+ SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
- if ((N1CFP && isConstOrConstSplatFP(N01)) ||
- (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDLoc SL(N);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
- return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+
+ // Check 1: Make sure that the first operand of the inner multiply is NOT
+ // a constant. Otherwise, we may induce infinite looping.
+ if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
+ // Check 2: Make sure that the second operand of the inner multiply and
+ // the second operand of the outer multiply are constants.
+ if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+ (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+ SDLoc SL(N);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts);
+ }
}
}
@@ -7821,8 +7962,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- if (N0C &&
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -7874,8 +8014,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- if (N0C &&
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -8033,7 +8172,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
@@ -8042,7 +8180,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
// fold (fp_extend c1fp) -> c1fp
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
@@ -8117,14 +8255,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVUnaryOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
-
// Constant fold FNEG.
- if (isa<ConstantFPSDNode>(N0))
- return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options))
@@ -8219,13 +8352,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVUnaryOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
-
// fold (fabs c1) -> fabs(c1)
- if (isa<ConstantFPSDNode>(N0))
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
@@ -8941,7 +9069,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), Align, LD->getAAInfo());
- return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ if (NewLoad.getNode() != N)
+ return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
@@ -9106,9 +9235,6 @@ struct LoadedSlice {
unsigned Shift = 0, SelectionDAG *DAG = nullptr)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
- LoadedSlice(const LoadedSlice &LS)
- : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
-
/// \brief Get the bits used in a chunk of bits \p BitWidth large.
/// \return Result is \p BitWidth and has used bits set to 1 and
/// not used bits set to 0.
@@ -9855,6 +9981,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
+namespace {
/// Helper struct to parse and store a memory address as base + index + offset.
/// We ignore sign extensions when it is safe to do so.
/// The following two expressions are not equivalent. To differentiate we need
@@ -9942,6 +10069,7 @@ struct BaseIndexOffset {
return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
}
};
+} // namespace
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
@@ -10575,11 +10703,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment())
- return DAG.getTruncStore(Chain, SDLoc(N), Value,
+ if (Align > ST->getAlignment()) {
+ SDValue NewStore =
+ DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align,
ST->getAAInfo());
+ if (NewStore.getNode() != N)
+ return CombineTo(ST, NewStore, true);
+ }
}
}
@@ -11226,12 +11358,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
- SDValue V = reduceBuildVecExtToExtBuildVec(N);
- if (V.getNode())
+ if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
- V = reduceBuildVecConvertToConvertBuildVec(N);
- if (V.getNode())
+ if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
return V;
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -11352,7 +11482,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
} else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
// If the input vector is too large, try to split it.
// We don't support having two input vectors that are too large.
- if (VecIn2.getNode())
+ // If the zero vector was used, we can not split the vector,
+ // since we'd need 3 inputs.
+ if (UsesZeroVector || VecIn2.getNode())
return SDValue();
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
@@ -11364,7 +11496,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(0, TLI.getVectorIdxTy()));
- UsesZeroVector = false;
} else
return SDValue();
}
@@ -11465,14 +11596,12 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
unsigned NumElts = OpVT.getVectorNumElements();
if (ISD::UNDEF == Op.getOpcode())
- for (unsigned i = 0; i != NumElts; ++i)
- Opnds.push_back(DAG.getUNDEF(MinVT));
+ Opnds.append(NumElts, DAG.getUNDEF(MinVT));
if (ISD::BUILD_VECTOR == Op.getOpcode()) {
if (SVT.isFloatingPoint()) {
assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
- for (unsigned i = 0; i != NumElts; ++i)
- Opnds.push_back(Op.getOperand(i));
+ Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
} else {
for (unsigned i = 0; i != NumElts; ++i)
Opnds.push_back(
@@ -11850,7 +11979,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// We may have jumped through bitcasts, so the type of the
// BUILD_VECTOR may not match the type of the shuffle.
if (V->getValueType(0) != VT)
- NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+ NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
return NewBV;
}
}
@@ -11872,6 +12001,81 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
+ // If this shuffle only has a single input that is a bitcasted shuffle,
+ // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+ // back to their original types.
+ if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
+ TLI.isTypeLegal(VT)) {
+
+ // Peek through the bitcast only if there is one user.
+ SDValue BC0 = N0;
+ while (BC0.getOpcode() == ISD::BITCAST) {
+ if (!BC0.hasOneUse())
+ break;
+ BC0 = BC0.getOperand(0);
+ }
+
+ auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
+ if (Scale == 1)
+ return SmallVector<int, 8>(Mask.begin(), Mask.end());
+
+ SmallVector<int, 8> NewMask;
+ for (int M : Mask)
+ for (int s = 0; s != Scale; ++s)
+ NewMask.push_back(M < 0 ? -1 : Scale * M + s);
+ return NewMask;
+ };
+
+ if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+ EVT SVT = VT.getScalarType();
+ EVT InnerVT = BC0->getValueType(0);
+ EVT InnerSVT = InnerVT.getScalarType();
+
+ // Determine which shuffle works with the smaller scalar type.
+ EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+ EVT ScaleSVT = ScaleVT.getScalarType();
+
+ if (TLI.isTypeLegal(ScaleVT) &&
+ 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+ 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+
+ int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+ int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+ // Scale the shuffle masks to the smaller scalar type.
+ ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+ SmallVector<int, 8> InnerMask =
+ ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
+ SmallVector<int, 8> OuterMask =
+ ScaleShuffleMask(SVN->getMask(), OuterScale);
+
+ // Merge the shuffle masks.
+ SmallVector<int, 8> NewMask;
+ for (int M : OuterMask)
+ NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+ // Test for shuffle mask legality over both commutations.
+ SDValue SV0 = BC0->getOperand(0);
+ SDValue SV1 = BC0->getOperand(1);
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ }
+
+ if (LegalMask) {
+ SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
+ SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
+ return DAG.getNode(
+ ISD::BITCAST, SDLoc(N), VT,
+ DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+ }
+ }
+ }
+ }
+
// Canonicalize shuffles according to rules:
// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
@@ -11981,16 +12185,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Avoid introducing shuffles with illegal mask.
if (!TLI.isShuffleMaskLegal(Mask, VT)) {
- // Compute the commuted shuffle mask and test again.
- for (unsigned i = 0; i != NumElts; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElts)
- Mask[i] = idx + NumElts;
- else
- Mask[i] = idx - NumElts;
- }
+ ShuffleVectorSDNode::commuteMask(Mask);
if (!TLI.isShuffleMaskLegal(Mask, VT))
return SDValue();
@@ -12010,6 +12205,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
+ SDValue InVal = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
+ // with a VECTOR_SHUFFLE.
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue InVec = InVal->getOperand(0);
+ SDValue EltNo = InVal->getOperand(1);
+
+ // FIXME: We could support implicit truncation if the shuffle can be
+ // scaled to a smaller vector scalar type.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
+ if (C0 && VT == InVec.getValueType() &&
+ VT.getScalarType() == InVal.getValueType()) {
+ SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+ int Elt = C0->getZExtValue();
+ NewMask[0] = Elt;
+
+ if (TLI.isShuffleMaskLegal(NewMask, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
+ NewMask);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N2 = N->getOperand(2);
@@ -12043,44 +12266,51 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
- SDLoc dl(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- if (N->getOpcode() == ISD::AND) {
- if (RHS.getOpcode() == ISD::BITCAST)
- RHS = RHS.getOperand(0);
- if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<int, 8> Indices;
- unsigned NumElts = RHS.getNumOperands();
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Elt = RHS.getOperand(i);
- if (!isa<ConstantSDNode>(Elt))
- return SDValue();
+ SDLoc dl(N);
- if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
- Indices.push_back(i);
- else if (cast<ConstantSDNode>(Elt)->isNullValue())
- Indices.push_back(NumElts+i);
- else
- return SDValue();
- }
+ // Make sure we're not running after operation legalization where it
+ // may have custom lowered the vector shuffles.
+ if (LegalOperations)
+ return SDValue();
+
+ if (N->getOpcode() != ISD::AND)
+ return SDValue();
- // Let's see if the target supports this vector_shuffle and make sure
- // we're not running after operation legalization where it may have
- // custom lowered the vector shuffles.
- EVT RVT = RHS.getValueType();
- if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT))
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
return SDValue();
- // Return the new VECTOR_SHUFFLE node.
- EVT EltVT = RVT.getVectorElementType();
- SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
- DAG.getConstant(0, EltVT));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
- LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
- SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+ if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ Indices.push_back(i);
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ Indices.push_back(NumElts+i);
+ else
+ return SDValue();
}
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ EVT EltVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, EltVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
}
return SDValue();
@@ -12093,8 +12323,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- SDValue Shuffle = XformToShuffleWithZero(N);
- if (Shuffle.getNode()) return Shuffle;
+
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
// If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
// this operation.
@@ -12172,38 +12403,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return SDValue();
}
-/// Visit a binary vector operation, like FABS/FNEG.
-SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- "SimplifyVUnaryOp only works on vectors!");
-
- SDValue N0 = N->getOperand(0);
-
- if (N0.getOpcode() != ISD::BUILD_VECTOR)
- return SDValue();
-
- // Operand is a BUILD_VECTOR node, see if we can constant fold it.
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
- SDValue Op = N0.getOperand(i);
- if (Op.getOpcode() != ISD::UNDEF &&
- Op.getOpcode() != ISD::ConstantFP)
- break;
- EVT EltVT = Op.getValueType();
- SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
- if (FoldOp.getOpcode() != ISD::UNDEF &&
- FoldOp.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(FoldOp);
- AddToWorklist(FoldOp.getNode());
- }
-
- if (Ops.size() != N0.getNumOperands())
- return SDValue();
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
-}
-
SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
SDValue N1, SDValue N2){
assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 1df4a1d..223a149 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -62,6 +62,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -497,7 +498,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
OI != E; ++OI) {
const Value *Idx = *OI;
if (auto *StTy = dyn_cast<StructType>(Ty)) {
- unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
@@ -518,8 +519,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
if (CI->isZero())
continue;
// N = N + Offset
- TotalOffs +=
- DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+ uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
+ TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
if (TotalOffs >= MaxOffs) {
N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
@@ -801,7 +802,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
return false;
// Push the register mask info.
- Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC)));
+ Ops.push_back(MachineOperand::CreateRegMask(
+ TRI.getCallPreservedMask(*FuncInfo.MF, CC)));
// Add scratch registers as implicit def and early clobber.
const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 7e72dc6..291b583 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 61c0a6f..ece38f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1442,13 +1442,27 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+ SDValue NewLoad;
+
if (Op.getValueType().isVector())
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
- false, false, false, 0);
- return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
- MachinePointerInfo(),
- Vec.getValueType().getVectorElementType(),
- false, false, false, 0);
+ NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,
+ MachinePointerInfo(), false, false, false, 0);
+ else
+ NewLoad = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(), false, false, false, 0);
+
+ // Replace the chain going out of the store, by the one out of the load.
+ DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
+
+ // We introduced a cycle though, so update the loads operands, making sure
+ // to use the original store's chain as an incoming chain.
+ SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(),
+ NewLoad->op_end());
+ NewLoadOperands[0] = Ch;
+ NewLoad =
+ SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0);
+ return NewLoad;
}
SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
@@ -2817,132 +2831,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC;
-
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic intrinsic Expand!");
- case ISD::ATOMIC_SWAP:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
- case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
- case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
- case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
- case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
- }
- break;
- case ISD::ATOMIC_CMP_SWAP:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
- case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
- case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
- case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
- case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_ADD:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_SUB:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_AND:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_OR:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_XOR:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_NAND:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_MAX:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_UMAX:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_MIN:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_UMIN:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break;
- }
- break;
- }
+ RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
return ExpandChainLibCall(LC, Node, false);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5507c70..25e80b9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1116,7 +1116,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
- assert(OpNo == 2 && "Only know how to promote the mask!");
SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
@@ -1127,7 +1126,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
if (!TLI.isTypeLegal(DataVT)) {
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
DataOp = GetPromotedInteger(DataOp);
- Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ if (!TLI.isTypeLegal(MaskVT))
+ Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
TruncateStore = true;
}
else {
@@ -1323,92 +1323,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC;
-
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic intrinsic Expand!");
- case ISD::ATOMIC_SWAP:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
- case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
- case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
- case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
- case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
- }
- break;
- case ISD::ATOMIC_CMP_SWAP:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
- case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
- case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
- case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
- case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_ADD:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_SUB:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_AND:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_OR:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_XOR:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_NAND:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
- }
- break;
- }
+ RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
return ExpandChainLibCall(LC, Node, false);
}
@@ -1417,12 +1333,19 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
/// and the shift amount is a constant 'Amt'. Expand the operation.
void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi) {
- assert(Amt && "Expected zero shifts to be already optimized away.");
SDLoc DL(N);
// Expand the incoming operand to be shifted, so that we have its parts
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
+ // splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
+ if (!Amt) {
+ Lo = InL;
+ Hi = InH;
+ return;
+ }
+
EVT NVT = InL.getValueType();
unsigned VTBits = N->getValueType(0).getSizeInBits();
unsigned NVTBits = NVT.getSizeInBits();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 63671f7..f7e4557 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2553,6 +2553,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
assert(InVT.isVector() && "can not widen non-vector type");
EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
InVT.getVectorElementType(), WidenNumElts);
+
+ // The input and output types often differ here, and it could be that while
+ // we'd prefer to widen the result type, the input operands have been split.
+ // In this case, we also need to split the result of this node as well.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
+ SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
+ return Res;
+ }
+
InOp1 = GetWidenedVector(InOp1);
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index db38b76..6303422 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -47,7 +47,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
TRI = STI.getRegisterInfo();
TLI = IS->TLI;
TII = STI.getInstrInfo();
- ResourcesModel = TII->CreateTargetScheduleState(STI);
+ ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
// This hard requirement could be relaxed, but for now
// do not let it procede.
assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
@@ -637,17 +637,3 @@ void ResourcePriorityQueue::remove(SUnit *SU) {
Queue.pop_back();
}
-
-
-#ifdef NDEBUG
-void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
-#else
-void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
- ResourcePriorityQueue q = *this;
- while (!q.empty()) {
- SUnit *su = q.pop();
- dbgs() << "Height " << su->getHeight() << ": ";
- su->dump(DAG);
- }
-}
-#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9466f4d..b52f648 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -196,6 +196,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
return true;
}
+/// \brief Return true if the specified node is a BUILD_VECTOR node of
+/// all ConstantFPSDNode or undef.
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (Op.getOpcode() == ISD::UNDEF)
+ continue;
+ if (!isa<ConstantFPSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
/// isScalarToVector - Return true if the specified node is a
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
/// element is not an undef.
@@ -1446,13 +1462,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
// N2 to point at N1.
static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
std::swap(N1, N2);
- int NElts = M.size();
- for (int i = 0; i != NElts; ++i) {
- if (M[i] >= NElts)
- M[i] -= NElts;
- else if (M[i] >= 0)
- M[i] += NElts;
- }
+ ShuffleVectorSDNode::commuteMask(M);
}
SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
@@ -1625,19 +1635,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
MVT VT = SV.getSimpleValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> MaskVec;
-
- for (unsigned i = 0; i != NumElems; ++i) {
- int Idx = SV.getMaskElt(i);
- if (Idx >= 0) {
- if (Idx < (int)NumElems)
- Idx += NumElems;
- else
- Idx -= NumElems;
- }
- MaskVec.push_back(Idx);
- }
+ SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+ ShuffleVectorSDNode::commuteMask(MaskVec);
SDValue Op0 = SV.getOperand(0);
SDValue Op1 = SV.getOperand(1);
@@ -2844,7 +2843,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
}
}
- // Constant fold unary operations with a vector integer operand.
+ // Constant fold unary operations with a vector integer or float operand.
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
if (BV->isConstant()) {
switch (Opcode) {
@@ -2852,18 +2851,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
// FIXME: Entirely reasonable to perform folding of other unary
// operations here as the need arises.
break;
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FP_EXTEND:
+ case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
+ // Let the above scalar folding handle the folding of each element.
SmallVector<SDValue, 8> Ops;
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue OpN = BV->getOperand(i);
- // Let the above scalar folding handle the conversion of each
- // element.
- OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(),
- OpN);
+ OpN = getNode(Opcode, DL, VT.getVectorElementType(), OpN);
+ if (OpN.getOpcode() != ISD::UNDEF &&
+ OpN.getOpcode() != ISD::Constant &&
+ OpN.getOpcode() != ISD::ConstantFP)
+ break;
Ops.push_back(OpN);
}
- return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ if (Ops.size() == VT.getVectorNumElements())
+ return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
}
}
}
@@ -5418,17 +5424,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
assert(N->getNumOperands() == NumOps &&
"Update with wrong number of operands");
- // Check to see if there is no change.
- bool AnyChange = false;
- for (unsigned i = 0; i != NumOps; ++i) {
- if (Ops[i] != N->getOperand(i)) {
- AnyChange = true;
- break;
- }
- }
-
- // No operands changed, just return the input node.
- if (!AnyChange) return N;
+ // If no operands changed just return the input node.
+ if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+ return N;
// See if the modified node already exists.
void *InsertPos = nullptr;
@@ -6673,8 +6671,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
- llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
- TLI->getDataLayout());
+ llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
+ *TLI->getDataLayout());
unsigned AlignBits = KnownZero.countTrailingOnes();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 097b618..6c14e79 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1016,6 +1016,24 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
}
}
+/// getCopyFromRegs - If there was virtual register allocated for the value V
+/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ SDValue res;
+
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
+ Ty);
+ SDValue Chain = DAG.getEntryNode();
+ res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ resolveDanglingDebugInfo(V, res);
+ }
+
+ return res;
+}
+
/// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If we already have an SDValue for this value, use it. It's important
@@ -1026,15 +1044,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If there's a virtual register allocated and initialized for this
// value, use it.
- DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
- if (It != FuncInfo.ValueMap.end()) {
- unsigned InReg = It->second;
- RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
- V->getType());
- SDValue Chain = DAG.getEntryNode();
- N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
- resolveDanglingDebugInfo(V, N);
- return N;
+ SDValue copyFromReg = getCopyFromRegs(V, V->getType());
+ if (copyFromReg.getNode()) {
+ return copyFromReg;
}
// Otherwise create a new SDValue and remember it.
@@ -1573,19 +1585,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// Update machine-CFG edges.
MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
- // Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = BrMBB;
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
if (I.isUnconditional()) {
// Update machine-CFG edges.
BrMBB->addSuccessor(Succ0MBB);
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
- if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None)
+ if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Succ0MBB)));
@@ -1682,7 +1688,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
- const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
SDValue CmpOp = getValue(CB.CmpMHS);
EVT VT = CmpOp.getValueType();
@@ -1705,16 +1711,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
if (CB.TrueBB != CB.FalseBB)
addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
- // Set NextBlock to be the MBB immediately after the current one, if any.
- // This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = SwitchBB;
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
// If the lhs block is the next block, invert the condition so that we can
// fall through to the lhs instead of the rhs block.
- if (CB.TrueBB == NextBlock) {
+ if (CB.TrueBB == NextBlock(SwitchBB)) {
std::swap(CB.TrueBB, CB.FalseBB);
SDValue True = DAG.getConstant(1, Cond.getValueType());
Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
@@ -1781,19 +1780,12 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
Sub.getValueType()),
Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT);
- // Set NextBlock to be the MBB immediately after the current one, if any.
- // This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = SwitchBB;
-
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
MVT::Other, CopyTo, CMP,
DAG.getBasicBlock(JT.Default));
- if (JT.MBB != NextBlock)
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond,
DAG.getBasicBlock(JT.MBB));
@@ -1922,13 +1914,6 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
B.Reg, Sub);
- // Set NextBlock to be the MBB immediately after the current one, if any.
- // This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = SwitchBB;
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
addSuccessorWithWeight(SwitchBB, B.Default);
@@ -1938,7 +1923,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MVT::Other, CopyTo, RangeCmp,
DAG.getBasicBlock(B.Default));
- if (MBB != NextBlock)
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != NextBlock(SwitchBB))
BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo,
DAG.getBasicBlock(MBB));
@@ -1991,14 +1977,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MVT::Other, getControlRoot(),
Cmp, DAG.getBasicBlock(B.TargetBB));
- // Set NextBlock to be the MBB immediately after the current one, if any.
- // This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = SwitchBB;
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
- if (NextMBB != NextBlock)
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != NextBlock(SwitchBB))
BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd,
DAG.getBasicBlock(NextMBB));
@@ -2027,13 +2007,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(&I, LandingPad);
break;
+ case Intrinsic::experimental_gc_statepoint:
+ LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+ break;
}
} else
LowerCallTo(&I, getValue(Callee), false, LandingPad);
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
- CopyToExportRegsIfNeeded(&I);
+ // We already took care of the exported value for the statepoint instruction
+ // during call to the LowerStatepoint.
+ if (!isStatepoint(I)) {
+ CopyToExportRegsIfNeeded(&I);
+ }
// Update successor info
addSuccessorWithWeight(InvokeMBB, Return);
@@ -2128,11 +2115,10 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
MachineFunction *CurMF = FuncInfo.MF;
// Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = nullptr;
+ MachineBasicBlock *NextMBB = nullptr;
MachineFunction::iterator BBI = CR.CaseBB;
-
if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
+ NextMBB = BBI;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// If any two of the cases has the same destination, and if one value
@@ -2146,8 +2132,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
Case &Big = *(CR.Range.second-1);
if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
- const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
- const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+ const APInt& SmallValue = Small.Low->getValue();
+ const APInt& BigValue = Big.Low->getValue();
// Check that there is only one bit different.
if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
@@ -2205,13 +2191,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
// Rearrange the case blocks so that the last one falls through if possible.
Case &BackCase = *(CR.Range.second-1);
- if (Size > 1 &&
- NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
- // The last case block won't fall through into 'NextBlock' if we emit the
+ if (Size > 1 && NextMBB && Default != NextMBB && BackCase.BB != NextMBB) {
+ // The last case block won't fall through into 'NextMBB' if we emit the
// branches in this order. See if rearranging a case value would help.
// We start at the bottom as it's the case with the least weight.
for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I)
- if (I->BB == NextBlock) {
+ if (I->BB == NextMBB) {
std::swap(*I, BackCase);
break;
}
@@ -2287,8 +2272,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
Case& FrontCase = *CR.Range.first;
Case& BackCase = *(CR.Range.second-1);
- const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt &First = FrontCase.Low->getValue();
+ const APInt &Last = BackCase.High->getValue();
APInt TSize(First.getBitWidth(), 0);
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
@@ -2338,8 +2323,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
std::vector<MachineBasicBlock*> DestBBs;
APInt TEI = First;
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
- const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
- const APInt &High = cast<ConstantInt>(I->High)->getValue();
+ const APInt &Low = I->Low->getValue();
+ const APInt &High = I->High->getValue();
if (Low.sle(TEI) && TEI.sle(High)) {
DestBBs.push_back(I->BB);
@@ -2352,26 +2337,19 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
// Calculate weight for each unique destination in CR.
DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
- if (FuncInfo.BPI)
- for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
- DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
- DestWeights.find(I->BB);
- if (Itr != DestWeights.end())
- Itr->second += I->ExtraWeight;
- else
- DestWeights[I->BB] = I->ExtraWeight;
- }
+ if (FuncInfo.BPI) {
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
+ DestWeights[I->BB] += I->ExtraWeight;
+ }
// Update successor info. Add one edge to each unique successor.
BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
- for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
- E = DestBBs.end(); I != E; ++I) {
- if (!SuccsHandled[(*I)->getNumber()]) {
- SuccsHandled[(*I)->getNumber()] = true;
- DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
- DestWeights.find(*I);
- addSuccessorWithWeight(JumpTableBB, *I,
- Itr != DestWeights.end() ? Itr->second : 0);
+ for (MachineBasicBlock *DestBB : DestBBs) {
+ if (!SuccsHandled[DestBB->getNumber()]) {
+ SuccsHandled[DestBB->getNumber()] = true;
+ auto I = DestWeights.find(DestBB);
+ addSuccessorWithWeight(JumpTableBB, DestBB,
+ I != DestWeights.end() ? I->second : 0);
}
}
@@ -2403,8 +2381,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// Size is the number of Cases represented by this range.
unsigned Size = CR.Range.second - CR.Range.first;
- const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt &First = FrontCase.Low->getValue();
+ const APInt &Last = BackCase.High->getValue();
double FMetric = 0;
CaseItr Pivot = CR.Range.first + Size/2;
@@ -2423,8 +2401,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
J!=E; ++I, ++J) {
- const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
- const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+ const APInt &LEnd = I->High->getValue();
+ const APInt &RBegin = J->Low->getValue();
APInt Range = ComputeRange(LEnd, RBegin);
assert((Range - 2ULL).isNonNegative() &&
"Invalid case distance");
@@ -2479,7 +2457,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
- const Constant *C = Pivot->Low;
+ const ConstantInt *C = Pivot->Low;
MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr;
// We know that we branch to the LHS if the Value being switched on is
@@ -2489,8 +2467,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
// Pivot's Value, then we can branch directly to the LHS's Target,
// rather than creating a leaf node for it.
if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE &&
- cast<ConstantInt>(C)->getValue() ==
- (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ C->getValue() == (CR.GE->getValue() + 1LL)) {
TrueBB = LHSR.first->BB;
} else {
TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
@@ -2506,8 +2483,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
// is CR.LT - 1, then we can branch directly to the target block for
// the current Case Value, rather than emitting a RHS leaf node for it.
if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
- cast<ConstantInt>(RHSR.first->Low)->getValue() ==
- (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ RHSR.first->Low->getValue() == (CR.LT->getValue() - 1LL)) {
FalseBB = RHSR.first->BB;
} else {
FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
@@ -2571,8 +2547,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
<< "Total number of comparisons: " << numCmps << '\n');
// Compute span of values.
- const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt& minValue = FrontCase.Low->getValue();
+ const APInt& maxValue = BackCase.High->getValue();
APInt cmpRange = maxValue - minValue;
DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
@@ -2612,8 +2588,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
count++;
}
- const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
- const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+ const APInt& lowValue = I->Low->getValue();
+ const APInt& highValue = I->High->getValue();
uint64_t lo = (lowValue - lowBound).getZExtValue();
uint64_t hi = (highValue - lowBound).getZExtValue();
@@ -2663,45 +2639,42 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
return true;
}
-/// Clusterify - Transform simple list of Cases into list of CaseRange's
-void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
- const SwitchInst& SI) {
+void SelectionDAGBuilder::Clusterify(CaseVector &Cases, const SwitchInst *SI) {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
- // Start with "simple" cases.
- for (SwitchInst::ConstCaseIt i : SI.cases()) {
- const BasicBlock *SuccBB = i.getCaseSuccessor();
- MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
-
- uint32_t ExtraWeight =
- BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0;
-
- Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
- SMBB, ExtraWeight));
- }
- std::sort(Cases.begin(), Cases.end(), CaseCmp());
-
- // Merge case into clusters
- if (Cases.size() >= 2)
- // Must recompute end() each iteration because it may be
- // invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
- J != Cases.end(); ) {
- const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
- const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
- MachineBasicBlock* nextBB = J->BB;
- MachineBasicBlock* currentBB = I->BB;
-
- // If the two neighboring cases go to the same destination, merge them
- // into a single case.
- if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
- I->High = J->High;
- I->ExtraWeight += J->ExtraWeight;
- J = Cases.erase(J);
- } else {
- I = J++;
- }
+
+ // Extract cases from the switch and sort them.
+ typedef std::pair<const ConstantInt*, unsigned> CasePair;
+ std::vector<CasePair> Sorted;
+ Sorted.reserve(SI->getNumCases());
+ for (auto I : SI->cases())
+ Sorted.push_back(std::make_pair(I.getCaseValue(), I.getSuccessorIndex()));
+ std::sort(Sorted.begin(), Sorted.end(), [](CasePair a, CasePair b) {
+ return a.first->getValue().slt(b.first->getValue());
+ });
+
+ // Merge adjacent cases with the same destination, build Cases vector.
+ assert(Cases.empty() && "Cases should be empty before Clusterify;");
+ Cases.reserve(SI->getNumCases());
+ MachineBasicBlock *PreviousSucc = nullptr;
+ for (CasePair &CP : Sorted) {
+ const ConstantInt *CaseVal = CP.first;
+ unsigned SuccIndex = CP.second;
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[SI->getSuccessor(SuccIndex)];
+ uint32_t Weight = BPI ? BPI->getEdgeWeight(SI->getParent(), SuccIndex) : 0;
+
+ if (PreviousSucc == Succ &&
+ (CaseVal->getValue() - Cases.back().High->getValue()) == 1) {
+ // If this case has the same successor and is a neighbour, merge it into
+ // the previous cluster.
+ Cases.back().High = CaseVal;
+ Cases.back().ExtraWeight += Weight;
+ } else {
+ Cases.push_back(Case(CaseVal, CaseVal, Succ, Weight));
}
+ PreviousSucc = Succ;
+ }
+
DEBUG({
size_t numCmps = 0;
for (auto &I : Cases)
@@ -2729,16 +2702,10 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
- // Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = nullptr;
- if (SwitchMBB + 1 != FuncInfo.MF->end())
- NextBlock = SwitchMBB + 1;
-
-
// Create a vector of Cases, sorted so that we can efficiently create a binary
// search tree from them.
CaseVector Cases;
- Clusterify(Cases, SI);
+ Clusterify(Cases, &SI);
// Get the default destination MBB.
MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -2775,7 +2742,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchMBB->addSuccessor(Default);
// If this is not a fall-through branch, emit the branch.
- if (Default != NextBlock) {
+ if (Default != NextBlock(SwitchMBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(Default)));
}
@@ -3429,30 +3396,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Ty = StTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
+ MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS);
+ unsigned PtrSize = PtrTy.getSizeInBits();
+ APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
// If this is a constant subscript, handle it quickly.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->isZero()) continue;
- uint64_t Offs =
- DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- SDValue OffsVal;
- EVT PTy = TLI.getPointerTy(AS);
- unsigned PtrBits = PTy.getSizeInBits();
- if (PtrBits < 64)
- OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
- DAG.getConstant(Offs, MVT::i64));
- else
- OffsVal = DAG.getConstant(Offs, PTy);
-
- N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
- OffsVal);
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+ SDValue OffsVal = DAG.getConstant(Offs, PtrTy);
+ N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal);
continue;
}
// N = N + Idx * ElementSize;
- APInt ElementSize =
- APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -3988,6 +3946,93 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
MVT::f32);
}
+static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
+ SelectionDAG &DAG) {
+ // IntegerPartOfX = ((int32_t)(t0);
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = t0 - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(
+ ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, DAG.getTargetLoweringInfo().getPointerTy()));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
+}
+
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
@@ -3999,92 +4044,10 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// final result:
//
// #define LOG2OFe 1.4426950f
- // IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+ // t0 = Op * LOG2OFe
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
getF32Constant(DAG, 0x3fb8aa3b));
- SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
- // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
- SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
- SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
- // IntegerPartOfX <<= 23;
- IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, TLI.getPointerTy()));
-
- SDValue TwoToFracPartOfX;
- if (LimitFloatPrecision <= 6) {
- // For floating-point precision of 6:
- //
- // TwoToFractionalPartOfX =
- // 0.997535578f +
- // (0.735607626f + 0.252464424f * x) * x;
- //
- // error 0.0144103317, which is 6 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3e814304));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3f3c50c8));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- } else if (LimitFloatPrecision <= 12) {
- // For floating-point precision of 12:
- //
- // TwoToFractionalPartOfX =
- // 0.999892986f +
- // (0.696457318f +
- // (0.224338339f + 0.792043434e-1f * x) * x) * x;
- //
- // 0.000107046256 error, which is 13 to 14 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3da235e3));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3e65b8f3));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f324b07));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- } else { // LimitFloatPrecision <= 18
- // For floating-point precision of 18:
- //
- // TwoToFractionalPartOfX =
- // 0.999999982f +
- // (0.693148872f +
- // (0.240227044f +
- // (0.554906021e-1f +
- // (0.961591928e-2f +
- // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
- //
- // error 2.47208000*10^(-7), which is better than 18 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3924b03e));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3ab24b87));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3c1d8c17));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3d634a1d));
- SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
- getF32Constant(DAG, 0x3e75fe14));
- SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
- getF32Constant(DAG, 0x3f317234));
- SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- }
-
- // Add the exponent into the result in integer domain.
- SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- t13, IntegerPartOfX));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
}
// No special expansion.
@@ -4375,91 +4338,8 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
if (Op.getValueType() == MVT::f32 &&
- LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
-
- // FractionalPartOfX = x - (float)IntegerPartOfX;
- SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
- SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
-
- // IntegerPartOfX <<= 23;
- IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, TLI.getPointerTy()));
-
- SDValue TwoToFractionalPartOfX;
- if (LimitFloatPrecision <= 6) {
- // For floating-point precision of 6:
- //
- // TwoToFractionalPartOfX =
- // 0.997535578f +
- // (0.735607626f + 0.252464424f * x) * x;
- //
- // error 0.0144103317, which is 6 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3e814304));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3f3c50c8));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- } else if (LimitFloatPrecision <= 12) {
- // For floating-point precision of 12:
- //
- // TwoToFractionalPartOfX =
- // 0.999892986f +
- // (0.696457318f +
- // (0.224338339f + 0.792043434e-1f * x) * x) * x;
- //
- // error 0.000107046256, which is 13 to 14 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3da235e3));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3e65b8f3));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f324b07));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- } else { // LimitFloatPrecision <= 18
- // For floating-point precision of 18:
- //
- // TwoToFractionalPartOfX =
- // 0.999999982f +
- // (0.693148872f +
- // (0.240227044f +
- // (0.554906021e-1f +
- // (0.961591928e-2f +
- // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
- // error 2.47208000*10^(-7), which is better than 18 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3924b03e));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3ab24b87));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3c1d8c17));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3d634a1d));
- SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
- getF32Constant(DAG, 0x3e75fe14));
- SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
- getF32Constant(DAG, 0x3f317234));
- SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- }
-
- // Add the exponent into the result in integer domain.
- SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
- TwoToFractionalPartOfX);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- t13, IntegerPartOfX));
- }
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
+ return getLimitedPrecisionExp2(Op, dl, DAG);
// No special expansion.
return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
@@ -4483,90 +4363,10 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
// final result:
//
// #define LOG2OF10 3.3219281f
- // IntegerPartOfX = (int32_t)(x * LOG2OF10);
+ // t0 = Op * LOG2OF10;
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
getF32Constant(DAG, 0x40549a78));
- SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
- // FractionalPartOfX = x - (float)IntegerPartOfX;
- SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
- SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
- // IntegerPartOfX <<= 23;
- IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, TLI.getPointerTy()));
-
- SDValue TwoToFractionalPartOfX;
- if (LimitFloatPrecision <= 6) {
- // For floating-point precision of 6:
- //
- // twoToFractionalPartOfX =
- // 0.997535578f +
- // (0.735607626f + 0.252464424f * x) * x;
- //
- // error 0.0144103317, which is 6 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3e814304));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3f3c50c8));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- } else if (LimitFloatPrecision <= 12) {
- // For floating-point precision of 12:
- //
- // TwoToFractionalPartOfX =
- // 0.999892986f +
- // (0.696457318f +
- // (0.224338339f + 0.792043434e-1f * x) * x) * x;
- //
- // error 0.000107046256, which is 13 to 14 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3da235e3));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3e65b8f3));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f324b07));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- } else { // LimitFloatPrecision <= 18
- // For floating-point precision of 18:
- //
- // TwoToFractionalPartOfX =
- // 0.999999982f +
- // (0.693148872f +
- // (0.240227044f +
- // (0.554906021e-1f +
- // (0.961591928e-2f +
- // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
- // error 2.47208000*10^(-7), which is better than 18 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3924b03e));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3ab24b87));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3c1d8c17));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3d634a1d));
- SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
- getF32Constant(DAG, 0x3e75fe14));
- SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
- getF32Constant(DAG, 0x3f317234));
- SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- }
-
- SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- t13, IntegerPartOfX));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
}
// No special expansion.
@@ -5114,34 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return nullptr;
}
- case Intrinsic::x86_avx_vinsertf128_pd_256:
- case Intrinsic::x86_avx_vinsertf128_ps_256:
- case Intrinsic::x86_avx_vinsertf128_si_256:
- case Intrinsic::x86_avx2_vinserti128: {
- EVT DestVT = TLI.getValueType(I.getType());
- EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
- uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
- ElVT.getVectorNumElements();
- Res =
- DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
- getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
- DAG.getConstant(Idx, TLI.getVectorIdxTy()));
- setValue(&I, Res);
- return nullptr;
- }
- case Intrinsic::x86_avx_vextractf128_pd_256:
- case Intrinsic::x86_avx_vextractf128_ps_256:
- case Intrinsic::x86_avx_vextractf128_si_256:
- case Intrinsic::x86_avx2_vextracti128: {
- EVT DestVT = TLI.getValueType(I.getType());
- uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
- DestVT.getVectorNumElements();
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
- getValue(I.getArgOperand(0)),
- DAG.getConstant(Idx, TLI.getVectorIdxTy()));
- setValue(&I, Res);
- return nullptr;
- }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -5539,7 +5311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
SmallVector<Value *, 4> Allocas;
- GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL);
+ GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
E = Allocas.end(); Object != E; ++Object) {
@@ -5618,45 +5390,47 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
- case Intrinsic::frameallocate: {
+ case Intrinsic::frameescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
- // Do the allocation and map it as a normal value.
- // FIXME: Maybe we should add this to the alloca map so that we don't have
- // to register allocate it?
- uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
- int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size);
- MVT PtrVT = TLI.getPointerTy(0);
- SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT);
- setValue(&I, FIVal);
-
- // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is
- // the same on all targets.
- MCSymbol *FrameAllocSym =
- MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName());
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
- TII->get(TargetOpcode::FRAME_ALLOC))
- .addSym(FrameAllocSym)
- .addFrameIndex(Alloc);
+ // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+ AllocaInst *Slot =
+ cast<AllocaInst>(I.getArgOperand(Idx)->stripPointerCasts());
+ assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+ "can only escape static allocas");
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName(),
+ Idx);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+ TII->get(TargetOpcode::FRAME_ALLOC))
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+ }
return nullptr;
}
case Intrinsic::framerecover: {
- // i8* @llvm.framerecover(i8* %fn, i8* %fp)
+ // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx)
MachineFunction &MF = DAG.getMachineFunction();
MVT PtrVT = TLI.getPointerTy(0);
// Get the symbol that defines the frame offset.
- Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
+ unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
MCSymbol *FrameAllocSym =
- MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName());
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName(),
+ IdxVal);
// Create a TargetExternalSymbol for the label to avoid any target lowering
// that would make this PC relative.
StringRef Name = FrameAllocSym->getName();
- assert(Name.size() == strlen(Name.data()) && "not null terminated");
+ assert(Name.data()[Name.size()] == '\0' && "not null terminated");
SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
SDValue OffsetVal =
DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
@@ -5672,6 +5446,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_begincatch:
case Intrinsic::eh_endcatch:
llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
+ case Intrinsic::eh_unwindhelp: {
+ AllocaInst *Slot =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+ "can only use static allocas with llvm.eh.unwindhelp");
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ // TODO: Save this in the not-yet-existant WinEHFuncInfo struct.
+ (void)FI;
+ return nullptr;
+ }
}
}
@@ -5805,9 +5589,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
- if (const Constant *LoadCst =
- ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
- Builder.DL))
+ if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
+ const_cast<Constant *>(LoadInput), *Builder.DL))
return Builder.getValue(LoadCst);
}
@@ -6748,10 +6531,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Memory output, or 'other' output (e.g. 'X' constraint).
assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
// Add information to the INLINEASM node to know about this output.
unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
- TLI.getPointerTy()));
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
}
@@ -6855,6 +6643,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
"Unexpected number of operands");
// Add information to the INLINEASM node to know about this input.
// See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
OpInfo.getMatchedOperand());
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
@@ -6894,10 +6683,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
"Memory operands expect pointer values");
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
- TLI.getPointerTy()));
+ ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, MVT::i32));
AsmNodeOperands.push_back(InOperandVal);
break;
}
@@ -7901,8 +7695,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
- // Check successor nodes' PHI nodes that expect a constant to be available
- // from this block.
+ // Check PHI nodes in successors that expect a value to be available from this
+ // block.
for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
const BasicBlock *SuccBB = TI->getSuccessor(succ);
if (!isa<PHINode>(SuccBB->begin())) continue;
@@ -7989,3 +7783,10 @@ AddSuccessorMBB(const BasicBlock *BB,
SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
return SuccMBB;
}
+
+MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
+ MachineFunction::iterator I = MBB;
+ if (++I == FuncInfo.MF->end())
+ return nullptr;
+ return I;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ad7411f..30240d8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -137,19 +137,19 @@ private:
/// Case - A struct to record the Value for a switch case, and the
/// case's target basic block.
struct Case {
- const Constant *Low;
- const Constant *High;
+ const ConstantInt *Low;
+ const ConstantInt *High;
MachineBasicBlock* BB;
uint32_t ExtraWeight;
Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
- Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
+ Case(const ConstantInt *low, const ConstantInt *high, MachineBasicBlock *bb,
uint32_t extraweight) : Low(low), High(high), BB(bb),
ExtraWeight(extraweight) { }
APInt size() const {
- const APInt &rHigh = cast<ConstantInt>(High)->getValue();
- const APInt &rLow = cast<ConstantInt>(Low)->getValue();
+ const APInt &rHigh = High->getValue();
+ const APInt &rLow = Low->getValue();
return (rHigh - rLow + 1ULL);
}
};
@@ -173,7 +173,7 @@ private:
/// CaseRec - A struct with ctor used in lowering switches to a binary tree
/// of conditional branches.
struct CaseRec {
- CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+ CaseRec(MachineBasicBlock *bb, const ConstantInt *lt, const ConstantInt *ge,
CaseRange r) :
CaseBB(bb), LT(lt), GE(ge), Range(r) {}
@@ -181,8 +181,8 @@ private:
MachineBasicBlock *CaseBB;
/// LT, GE - If nonzero, we know the current case value must be less-than or
/// greater-than-or-equal-to these Constants.
- const Constant *LT;
- const Constant *GE;
+ const ConstantInt *LT;
+ const ConstantInt *GE;
/// Range - A pair of iterators representing the range of case values to be
/// processed at this point in the binary search tree.
CaseRange Range;
@@ -190,24 +190,15 @@ private:
typedef std::vector<CaseRec> CaseRecVector;
- /// The comparison function for sorting the switch case values in the vector.
- /// WARNING: Case ranges should be disjoint!
- struct CaseCmp {
- bool operator()(const Case &C1, const Case &C2) {
- assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
- const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
- const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
- return CI1->getValue().slt(CI2->getValue());
- }
- };
-
struct CaseBitsCmp {
bool operator()(const CaseBits &C1, const CaseBits &C2) {
return C1.Bits > C2.Bits;
}
};
- void Clusterify(CaseVector &Cases, const SwitchInst &SI);
+ /// Populate Cases with the cases in SI, clustering adjacent cases with the
+ /// same destination together.
+ void Clusterify(CaseVector &Cases, const SwitchInst *SI);
/// CaseBlock - This structure is used to communicate between
/// SelectionDAGBuilder and SDISel for the code generation of additional basic
@@ -606,6 +597,10 @@ public:
void visit(unsigned Opcode, const User &I);
+ /// getCopyFromRegs - If there was virtual register allocated for the value V
+ /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+ SDValue getCopyFromRegs(const Value *V, Type *Ty);
+
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void resolveDanglingDebugInfo(const Value *V, SDValue Val);
@@ -622,8 +617,7 @@ public:
void removeValue(const Value *V) {
// This is to support hack in lowerCallFromStatepoint
// Should be removed when hack is resolved
- if (NodeMap.count(V))
- NodeMap.erase(V);
+ NodeMap.erase(V);
}
void setUnusedArgValue(const Value *V, SDValue NewN) {
@@ -662,7 +656,9 @@ public:
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
// This function is responsible for the whole statepoint lowering process.
- void LowerStatepoint(ImmutableStatepoint Statepoint);
+ // It uniformly handles invoke and call statepoints.
+ void LowerStatepoint(ImmutableStatepoint Statepoint,
+ MachineBasicBlock *LandingPad = nullptr);
private:
std::pair<SDValue, SDValue> lowerInvokable(
TargetLowering::CallLoweringInfo &CLI,
@@ -830,6 +826,9 @@ private:
bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr,
int64_t Offset, bool IsIndirect,
const SDValue &N);
+
+ /// Return the next block after MBB, or nullptr if there is none.
+ MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
};
} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 17eff94..5898da4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -95,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_ALLOC_RECOVER: return "FRAME_ALLOC_RECOVER";
case ISD::READ_REGISTER: return "READ_REGISTER";
case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 5e867cf..4d2af3f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -168,14 +168,13 @@ static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
"instruction selector"));
-static cl::opt<bool>
-EnableFastISelAbort("fast-isel-abort", cl::Hidden,
- cl::desc("Enable abort calls when \"fast\" instruction selection "
- "fails to lower an instruction"));
-static cl::opt<bool>
-EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden,
- cl::desc("Enable abort calls when \"fast\" instruction selection "
- "fails to lower a formal argument"));
+static cl::opt<int> EnableFastISelAbort(
+ "fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower an instruction: 0 disable the abort, 1 will "
+ "abort but for args, calls and terminators, 2 will also "
+ "abort for argument lowering, and 3 will never fallback "
+ "to SelectionDAG."));
static cl::opt<bool>
UseMBPI("use-mbpi",
@@ -293,7 +292,8 @@ namespace llvm {
const TargetLowering *TLI = IS->TLI;
const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
- if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() ||
+ if (OptLevel == CodeGenOpt::None ||
+ (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
TLI->getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::RegPressure)
@@ -416,7 +416,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
"-fast-isel-verbose requires -fast-isel");
assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
- "-fast-isel-abort requires -fast-isel");
+ "-fast-isel-abort > 0 requires -fast-isel");
const Function &Fn = *mf.getFunction();
MF = &mf;
@@ -595,9 +595,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
- // Lower all of the non-terminator instructions. If a call is emitted
- // as a tail call, cease emitting nodes for this block. Terminators
- // are handled below.
+ // Lower the instructions. If a call is emitted as a tail call, cease emitting
+ // nodes for this block.
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
SDB->visit(*I);
@@ -1182,8 +1181,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (!FastIS->lowerArguments()) {
// Fast isel failed to lower these arguments
++NumFastIselFailLowerArguments;
- if (EnableFastISelAbortArgs)
- llvm_unreachable("FastISel didn't lower all arguments");
+ if (EnableFastISelAbort > 1)
+ report_fatal_error("FastISel didn't lower all arguments");
// Use SelectionDAG argument lowering
LowerArguments(Fn);
@@ -1252,6 +1251,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
dbgs() << "FastISel missed call: ";
Inst->dump();
}
+ if (EnableFastISelAbort > 2)
+ // FastISel selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ report_fatal_error("FastISel didn't select the entire block");
if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
unsigned &R = FuncInfo->ValueMap[Inst];
@@ -1279,24 +1282,24 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
- if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
- // Don't abort, and use a different message for terminator misses.
- NumFastIselFailures += NumFastIselRemaining;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
+ bool ShouldAbort = EnableFastISelAbort;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ if (isa<TerminatorInst>(Inst)) {
+ // Use a different message for terminator misses.
dbgs() << "FastISel missed terminator: ";
- Inst->dump();
- }
- } else {
- NumFastIselFailures += NumFastIselRemaining;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
+ // Don't abort unless for terminator unless the level is really high
+ ShouldAbort = (EnableFastISelAbort > 2);
+ } else {
dbgs() << "FastISel miss: ";
- Inst->dump();
}
- if (EnableFastISelAbort)
- // The "fast" selector couldn't handle something and bailed.
- // For the purpose of debugging, just abort.
- llvm_unreachable("FastISel didn't select the entire block");
+ Inst->dump();
}
+ if (ShouldAbort)
+ // FastISel selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ report_fatal_error("FastISel didn't select the entire block");
+
+ NumFastIselFailures += NumFastIselRemaining;
break;
}
@@ -1775,9 +1778,23 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
} else {
assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
"Memory operand with multiple values?");
+
+ unsigned TiedToOperand;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+ // We need the constraint ID from the operand this is tied to.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ for (; TiedToOperand; --TiedToOperand) {
+ CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ }
+ }
+
// Otherwise, this is a memory operand. Ask the target to select it.
std::vector<SDValue> SelOps;
- if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+ if (SelectInlineAsmMemoryOperand(InOps[i+1],
+ InlineAsm::getMemoryConstraintID(Flags),
+ SelOps))
report_fatal_error("Could not match memory address. Inline asm"
" failure!");
@@ -1933,7 +1950,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops);
- EVT VTs[] = { MVT::Other, MVT::Glue };
+ const EVT VTs[] = {MVT::Other, MVT::Glue};
SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops);
New->setNodeId(-1);
return New.getNode();
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 1271f6b..3cc7a98 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -224,6 +224,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
+ MachineBasicBlock *LandingPad,
SelectionDAGBuilder &Builder) {
ImmutableCallSite CS(StatepointSite.getCallSite());
@@ -245,15 +246,29 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
Tmp->setTailCall(CS.isTailCall());
Tmp->setCallingConv(CS.getCallingConv());
Tmp->setAttributes(CS.getAttributes());
- Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false);
+ Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false, LandingPad);
// Handle the return value of the call iff any.
const bool HasDef = !Tmp->getType()->isVoidTy();
if (HasDef) {
- // The value of the statepoint itself will be the value of call itself.
- // We'll replace the actually call node shortly. gc_result will grab
- // this value.
- Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp));
+ if (CS.isInvoke()) {
+ // Result value will be used in different basic block for invokes
+ // so we need to export it now. But statepoint call has a different type
+ // than the actuall call. It means that standart exporting mechanism will
+ // create register of the wrong type. So instead we need to create
+ // register with correct type and save value into it manually.
+ // TODO: To eliminate this problem we can remove gc.result intrinsics
+ // completelly and make statepoint call to return a tuple.
+ unsigned reg = Builder.FuncInfo.CreateRegs(Tmp->getType());
+ Builder.CopyValueToVirtualRegister(Tmp, reg);
+ Builder.FuncInfo.ValueMap[CS.getInstruction()] = reg;
+ }
+ else {
+ // The value of the statepoint itself will be the value of call itself.
+ // We'll replace the actually call node shortly. gc_result will grab
+ // this value.
+ Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp));
+ }
} else {
// The token value is never used from here on, just generate a poison value
Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1));
@@ -267,6 +282,15 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
// Search for the call node
// The following code is essentially reverse engineering X86's
// LowerCallTo.
+ // We are expecting DAG to have the following form:
+ // ch = eh_label (only in case of invoke statepoint)
+ // ch, glue = callseq_start ch
+ // ch, glue = X86::Call ch, glue
+ // ch, glue = callseq_end ch, glue
+ // ch = eh_label ch (only in case of invoke statepoint)
+ //
+ // DAG root will be either last eh_label or callseq_end.
+
SDNode *CallNode = nullptr;
// We just emitted a call, so it should be last thing generated
@@ -276,8 +300,11 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
SDNode *CallEnd = Chain.getNode();
int Sanity = 0;
while (CallEnd->getOpcode() != ISD::CALLSEQ_END) {
- CallEnd = CallEnd->getGluedNode();
- assert(CallEnd && "Can not find call node");
+ assert(CallEnd->getNumOperands() >= 1 &&
+ CallEnd->getOperand(0).getValueType() == MVT::Other);
+
+ CallEnd = CallEnd->getOperand(0).getNode();
+
assert(Sanity < 20 && "should have found call end already");
Sanity++;
}
@@ -506,7 +533,9 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
LowerStatepoint(ImmutableStatepoint(&CI));
}
-void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) {
+void
+SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
+ MachineBasicBlock *LandingPad/*=nullptr*/) {
// The basic scheme here is that information about both the original call and
// the safepoint is encoded in the CallInst. We create a temporary call and
// lower it, then reverse engineer the calling sequence.
@@ -542,13 +571,12 @@ void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) {
}
#endif
-
// Lower statepoint vmstate and gcstate arguments
SmallVector<SDValue, 10> LoweredArgs;
lowerStatepointMetaArgs(LoweredArgs, ISP, *this);
// Get call node, we will replace it later with statepoint
- SDNode *CallNode = lowerCallFromStatepoint(ISP, *this);
+ SDNode *CallNode = lowerCallFromStatepoint(ISP, LandingPad, *this);
// Construct the actual STATEPOINT node with all the appropriate arguments
// and return values.
@@ -634,7 +662,24 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
assert(isStatepoint(I) &&
"first argument must be a statepoint token");
- setValue(&CI, getValue(I));
+ if (isa<InvokeInst>(I)) {
+ // For invokes we should have stored call result in a virtual register.
+ // We can not use default getValue() functionality to copy value from this
+ // register because statepoint and actuall call return types can be
+ // different, and getValue() will use CopyFromReg of the wrong type,
+ // which is always i32 in our case.
+ PointerType *CalleeType = cast<PointerType>(
+ ImmutableStatepoint(I).actualCallee()->getType());
+ Type *RetTy = cast<FunctionType>(
+ CalleeType->getElementType())->getReturnType();
+ SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
+
+ assert(CopyFromReg.getNode());
+ setValue(&CI, CopyFromReg);
+ }
+ else {
+ setValue(&CI, getValue(I));
+ }
}
void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0a3c926..ddbf0b2 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -96,18 +96,21 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
for (unsigned i = 0; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
+ Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
Args.push_back(Entry);
}
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ .setSExtResult(signExtend).setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index f6393a5..66a6a3c 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -53,10 +53,10 @@ private:
Type *GetConcreteStackEntryType(Function &F);
void CollectRoots(Function &F);
static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
- Value *BasePtr, int Idx1,
+ Type *Ty, Value *BasePtr, int Idx1,
const char *Name);
static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
- Value *BasePtr, int Idx1, int Idx2,
+ Type *Ty, Value *BasePtr, int Idx1, int Idx2,
const char *Name);
};
}
@@ -343,13 +343,14 @@ void ShadowStackGCLowering::CollectRoots(Function &F) {
}
GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
- IRBuilder<> &B, Value *BasePtr,
- int Idx, int Idx2,
- const char *Name) {
+ IRBuilder<> &B, Type *Ty,
+ Value *BasePtr, int Idx,
+ int Idx2,
+ const char *Name) {
Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
ConstantInt::get(Type::getInt32Ty(Context), Idx),
ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
- Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+ Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -357,11 +358,11 @@ GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
}
GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
- IRBuilder<> &B, Value *BasePtr,
+ IRBuilder<> &B, Type *Ty, Value *BasePtr,
int Idx, const char *Name) {
Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
ConstantInt::get(Type::getInt32Ty(Context), Idx)};
- Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+ Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -402,14 +403,15 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Initialize the map pointer and load the current head of the shadow stack.
Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
- Instruction *EntryMapPtr =
- CreateGEP(Context, AtEntry, StackEntry, 0, 1, "gc_frame.map");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, 1, "gc_frame.map");
AtEntry.CreateStore(FrameMap, EntryMapPtr);
// After all the allocas...
for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
// For each root, find the corresponding slot in the aggregate...
- Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+ Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 1 + I, "gc_root");
// And use it in lieu of the alloca.
AllocaInst *OriginalAlloca = Roots[I].second;
@@ -426,10 +428,10 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
AtEntry.SetInsertPoint(IP->getParent(), IP);
// Push the entry onto the shadow stack.
- Instruction *EntryNextPtr =
- CreateGEP(Context, AtEntry, StackEntry, 0, 0, "gc_frame.next");
- Instruction *NewHeadVal =
- CreateGEP(Context, AtEntry, StackEntry, 0, "gc_newhead");
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, 0, "gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, "gc_newhead");
AtEntry.CreateStore(CurrentHead, EntryNextPtr);
AtEntry.CreateStore(NewHeadVal, Head);
@@ -439,7 +441,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Pop the entry from the shadow stack. Don't reuse CurrentHead from
// AtEntry, since that would make the value live for the entire function.
Instruction *EntryNextPtr2 =
- CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next");
+ CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
+ "gc_frame.next");
Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
AtExit->CreateStore(SavedHead, Head);
}
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 35e4292..2335a88 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -128,7 +128,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
Value *Zero = ConstantInt::get(Int32Ty, 0);
Value *One = ConstantInt::get(Int32Ty, 1);
Value *Idxs[2] = { Zero, One };
- Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
+ Value *CallSite =
+ Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site");
// Insert a store of the call-site number
ConstantInt *CallSiteNoC =
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index d46621d..025ae70 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -127,7 +127,7 @@ void SlotIndexes::renumberIndexes() {
void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
// Number indexes with half the default spacing so we can catch up quickly.
const unsigned Space = SlotIndex::InstrDist/2;
- assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+ static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM");
IndexList::iterator startItr = std::prev(curItr);
unsigned index = startItr->getIndex();
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index faf94b6..7572803 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -364,7 +364,7 @@ void StackColoring::calculateLocalLiveness() {
}
}
- BBSet = NextBBSet;
+ BBSet = std::move(NextBBSet);
}// while changed.
}
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 767f43a..d88be57 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -14,24 +14,24 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/StackMapLivenessAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "stackmaps"
-namespace llvm {
-cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness",
- cl::Hidden, cl::init(true),
- cl::desc("Enable PatchPoint Liveness Analysis Pass"));
-}
+static cl::opt<bool> EnablePatchPointLiveness(
+ "enable-patchpoint-liveness", cl::Hidden, cl::init(true),
+ cl::desc("Enable PatchPoint Liveness Analysis Pass"));
STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped");
@@ -39,6 +39,46 @@ STATISTIC(NumBBsVisited, "Number of basic blocks visited");
STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap");
STATISTIC(NumStackMaps, "Number of StackMaps visited");
+namespace {
+/// \brief This pass calculates the liveness information for each basic block in
+/// a function and attaches the register live-out information to a patchpoint
+/// intrinsic if present.
+///
+/// This pass can be disabled via the -enable-patchpoint-liveness=false flag.
+/// The pass skips functions that don't have any patchpoint intrinsics. The
+/// information provided by this pass is optional and not required by the
+/// aformentioned intrinsic to function.
+class StackMapLiveness : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+ LivePhysRegs LiveRegs;
+
+public:
+ static char ID;
+
+ /// \brief Default construct and initialize the pass.
+ StackMapLiveness();
+
+ /// \brief Tell the pass manager which passes we depend on and what
+ /// information we preserve.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// \brief Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// \brief Performs the actual liveness calculation for the function.
+ bool calculateLiveness();
+
+ /// \brief Add the current register live set to the instruction.
+ void addLiveOutSetToMI(MachineInstr &MI);
+
+ /// \brief Create a register mask and initialize it with the registers from
+ /// the register live set.
+ uint32_t *createRegisterMask() const;
+};
+} // namespace
+
char StackMapLiveness::ID = 0;
char &llvm::StackMapLivenessID = StackMapLiveness::ID;
INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness",
@@ -60,18 +100,18 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
}
/// Calculate the liveness information for the given machine function.
-bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
+bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
if (!EnablePatchPointLiveness)
return false;
- DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
- << _MF.getName() << " **********\n");
- MF = &_MF;
- TRI = MF->getSubtarget().getRegisterInfo();
+ DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName()
+ << " **********\n");
+ this->MF = &MF;
+ TRI = MF.getSubtarget().getRegisterInfo();
++NumStackMapFuncVisited;
// Skip this function if there are no patchpoints to process.
- if (!MF->getFrameInfo()->hasPatchPoint()) {
+ if (!MF.getFrameInfo()->hasPatchPoint()) {
++NumStackMapFuncSkipped;
return false;
}
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 5d46419..aa18dea 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -19,8 +19,6 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -76,10 +74,21 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
llvm_unreachable("Unsupported stackmap version!");
}
+/// Go up the super-register chain until we hit a valid dwarf register number.
+static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
+ int RegNo = TRI->getDwarfRegNum(Reg, false);
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
+ RegNo = TRI->getDwarfRegNum(*SR, false);
+
+ assert(RegNo >= 0 && "Invalid Dwarf register number.");
+ return (unsigned) RegNo;
+}
+
MachineInstr::const_mop_iterator
StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
LocationVec &Locs, LiveOutVec &LiveOuts) const {
+ const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
if (MOI->isImm()) {
switch (MOI->getImm()) {
default: llvm_unreachable("Unrecognized operand type.");
@@ -89,7 +98,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
Size /= 8;
unsigned Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
- Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm));
+ Locs.push_back(Location(StackMaps::Location::Direct, Size,
+ getDwarfRegNum(Reg, TRI), Imm));
break;
}
case StackMaps::IndirectMemRefOp: {
@@ -97,7 +107,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
assert(Size > 0 && "Need a valid size for indirect memory locations.");
unsigned Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
- Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm));
+ Locs.push_back(Location(StackMaps::Location::Indirect, Size,
+ getDwarfRegNum(Reg, TRI), Imm));
break;
}
case StackMaps::ConstantOp: {
@@ -122,12 +133,18 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
"Virtreg operands should have been rewritten before now.");
- const TargetRegisterClass *RC =
- AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass(
- MOI->getReg());
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
assert(!MOI->getSubReg() && "Physical subreg still around.");
+
+ unsigned Offset = 0;
+ unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI);
+ unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
+ unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg());
+ if (SubRegIdx)
+ Offset = TRI->getSubRegIdxOffset(SubRegIdx);
+
Locs.push_back(
- Location(Location::Register, RC->getSize(), MOI->getReg(), 0));
+ Location(Location::Register, RC->getSize(), RegNo, Offset));
return ++MOI;
}
@@ -137,14 +154,74 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
return ++MOI;
}
-/// Go up the super-register chain until we hit a valid dwarf register number.
-static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
- int RegNo = TRI->getDwarfRegNum(Reg, false);
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
- RegNo = TRI->getDwarfRegNum(*SR, false);
+void StackMaps::print(raw_ostream &OS) {
+ const TargetRegisterInfo *TRI =
+ AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr;
+ OS << WSMP << "callsites:\n";
+ for (const auto &CSI : CSInfos) {
+ const LocationVec &CSLocs = CSI.Locations;
+ const LiveOutVec &LiveOuts = CSI.LiveOuts;
- assert(RegNo >= 0 && "Invalid Dwarf register number.");
- return (unsigned) RegNo;
+ OS << WSMP << "callsite " << CSI.ID << "\n";
+ OS << WSMP << " has " << CSLocs.size() << " locations\n";
+
+ unsigned OperIdx = 0;
+ for (const auto &Loc : CSLocs) {
+ OS << WSMP << " Loc " << OperIdx << ": ";
+ switch (Loc.LocType) {
+ case Location::Unprocessed:
+ OS << "<Unprocessed operand>";
+ break;
+ case Location::Register:
+ OS << "Register ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ break;
+ case Location::Direct:
+ OS << "Direct ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ if (Loc.Offset)
+ OS << " + " << Loc.Offset;
+ break;
+ case Location::Indirect:
+ OS << "Indirect ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ OS << "+" << Loc.Offset;
+ break;
+ case Location::Constant:
+ OS << "Constant " << Loc.Offset;
+ break;
+ case Location::ConstantIndex:
+ OS << "Constant Index " << Loc.Offset;
+ break;
+ }
+ OS << " [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size
+ << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
+ OperIdx++;
+ }
+
+ OS << WSMP << " has " << LiveOuts.size() << " live-out registers\n";
+
+ OperIdx = 0;
+ for (const auto &LO : LiveOuts) {
+ OS << WSMP << " LO " << OperIdx << ": ";
+ if (TRI)
+ OS << TRI->getName(LO.Reg);
+ else
+ OS << LO.Reg;
+ OS << " [encoding: .short " << LO.RegNo << ", .byte 0, .byte "
+ << LO.Size << "]\n";
+ OperIdx++;
+ }
+ }
}
/// Create a live-out register record for the given register Reg.
@@ -160,7 +237,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
StackMaps::LiveOutVec
StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
assert(Mask && "No register mask specified");
- const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
LiveOutVec LiveOuts;
// Create a LiveOutReg for each bit that is set in the register mask.
@@ -383,16 +460,13 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
/// 0x3, Indirect, [Reg + Offset] (spilled value)
/// 0x4, Constant, Offset (small constant)
/// 0x5, ConstIndex, Constants[Offset] (large constant)
-void StackMaps::emitCallsiteEntries(MCStreamer &OS,
- const TargetRegisterInfo *TRI) {
+void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
+ DEBUG(print(dbgs()));
// Callsite entries.
- DEBUG(dbgs() << WSMP << "callsites:\n");
for (const auto &CSI : CSInfos) {
const LocationVec &CSLocs = CSI.Locations;
const LiveOutVec &LiveOuts = CSI.LiveOuts;
- DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n");
-
// Verify stack map entry. It's better to communicate a problem to the
// runtime than crash in case of in-process compilation. Currently, we do
// simple overflow checks, but we may eventually communicate other
@@ -413,83 +487,20 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS,
// Reserved for flags.
OS.EmitIntValue(0, 2);
-
- DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n");
-
OS.EmitIntValue(CSLocs.size(), 2);
- unsigned OperIdx = 0;
for (const auto &Loc : CSLocs) {
- unsigned RegNo = 0;
- int Offset = Loc.Offset;
- if(Loc.Reg) {
- RegNo = getDwarfRegNum(Loc.Reg, TRI);
-
- // If this is a register location, put the subregister byte offset in
- // the location offset.
- if (Loc.LocType == Location::Register) {
- assert(!Loc.Offset && "Register location should have zero offset");
- unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
- unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg);
- if (SubRegIdx)
- Offset = TRI->getSubRegIdxOffset(SubRegIdx);
- }
- }
- else {
- assert(Loc.LocType != Location::Register &&
- "Missing location register");
- }
-
- DEBUG(dbgs() << WSMP << " Loc " << OperIdx << ": ";
- switch (Loc.LocType) {
- case Location::Unprocessed:
- dbgs() << "<Unprocessed operand>";
- break;
- case Location::Register:
- dbgs() << "Register " << TRI->getName(Loc.Reg);
- break;
- case Location::Direct:
- dbgs() << "Direct " << TRI->getName(Loc.Reg);
- if (Loc.Offset)
- dbgs() << " + " << Loc.Offset;
- break;
- case Location::Indirect:
- dbgs() << "Indirect " << TRI->getName(Loc.Reg)
- << " + " << Loc.Offset;
- break;
- case Location::Constant:
- dbgs() << "Constant " << Loc.Offset;
- break;
- case Location::ConstantIndex:
- dbgs() << "Constant Index " << Loc.Offset;
- break;
- }
- dbgs() << " [encoding: .byte " << Loc.LocType
- << ", .byte " << Loc.Size
- << ", .short " << RegNo
- << ", .int " << Offset << "]\n";
- );
-
OS.EmitIntValue(Loc.LocType, 1);
OS.EmitIntValue(Loc.Size, 1);
- OS.EmitIntValue(RegNo, 2);
- OS.EmitIntValue(Offset, 4);
- OperIdx++;
+ OS.EmitIntValue(Loc.Reg, 2);
+ OS.EmitIntValue(Loc.Offset, 4);
}
- DEBUG(dbgs() << WSMP << " has " << LiveOuts.size()
- << " live-out registers\n");
-
// Num live-out registers and padding to align to 4 byte.
OS.EmitIntValue(0, 2);
OS.EmitIntValue(LiveOuts.size(), 2);
- OperIdx = 0;
for (const auto &LO : LiveOuts) {
- DEBUG(dbgs() << WSMP << " LO " << OperIdx << ": "
- << TRI->getName(LO.Reg)
- << " [encoding: .short " << LO.RegNo
- << ", .byte 0, .byte " << LO.Size << "]\n");
OS.EmitIntValue(LO.RegNo, 2);
OS.EmitIntValue(0, 1);
OS.EmitIntValue(LO.Size, 1);
@@ -512,7 +523,6 @@ void StackMaps::serializeToStackMapSection() {
MCContext &OutContext = AP.OutStreamer.getContext();
MCStreamer &OS = AP.OutStreamer;
- const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
// Create the section.
const MCSection *StackMapSection =
@@ -527,7 +537,7 @@ void StackMaps::serializeToStackMapSection() {
emitStackmapHeader(OS);
emitFunctionFrameRecords(OS);
emitConstantPoolEntries(OS);
- emitCallsiteEntries(OS, TRI);
+ emitCallsiteEntries(OS);
OS.AddBlankLine();
// Clean up.
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index cc72e5e..a5a175f 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -184,10 +184,18 @@ void StackSlotColoring::InitializeSlots() {
UsedColors.resize(LastFI);
Assignments.resize(LastFI);
+ typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair;
+ SmallVector<Pair *, 16> Intervals;
+ Intervals.reserve(LS->getNumIntervals());
+ for (auto &I : *LS)
+ Intervals.push_back(&I);
+ std::sort(Intervals.begin(), Intervals.end(),
+ [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
+
// Gather all spill slots into a list.
DEBUG(dbgs() << "Spill slot intervals:\n");
- for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
- LiveInterval &li = i->second;
+ for (auto *I : Intervals) {
+ LiveInterval &li = I->second;
DEBUG(li.dump());
int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
if (MFI->isDeadObjectIndex(FI))
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 2566c1f..38725b5 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -285,21 +285,20 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
unsigned SubIdx, unsigned &Size,
unsigned &Offset,
- const TargetMachine *TM) const {
+ const MachineFunction &MF) const {
if (!SubIdx) {
Size = RC->getSize();
Offset = 0;
return true;
}
- unsigned BitSize =
- TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
// Convert bit size to byte size to be consistent with
// MCRegisterClass::getSize().
if (BitSize % 8)
return false;
- int BitOffset =
- TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
+ int BitOffset = TRI->getSubRegIdxOffset(SubIdx);
if (BitOffset < 0 || BitOffset % 8)
return false;
@@ -308,7 +307,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
- if (!TM->getDataLayout()->isLittleEndian()) {
+ if (!MF.getTarget().getDataLayout()->isLittleEndian()) {
Offset = RC->getSize() - (Offset + Size);
}
return true;
@@ -377,16 +376,13 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
llvm_unreachable("Not a MachO target");
}
-bool TargetInstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
+bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ ArrayRef<unsigned> Ops) const {
return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
}
-static MachineInstr* foldPatchpoint(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex,
+static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops, int FrameIndex,
const TargetInstrInfo &TII) {
unsigned StartIdx = 0;
switch (MI->getOpcode()) {
@@ -405,9 +401,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
// Return false if any operands requested for folding are not foldable (not
// part of the stackmap's live values).
- for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
- I != E; ++I) {
- if (*I < StartIdx)
+ for (unsigned Op : Ops) {
+ if (Op < StartIdx)
return nullptr;
}
@@ -427,8 +422,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
// Compute the spill slot size and offset.
const TargetRegisterClass *RC =
MF.getRegInfo().getRegClass(MO.getReg());
- bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize,
- SpillOffset, &MF.getTarget());
+ bool Valid =
+ TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF);
if (!Valid)
report_fatal_error("cannot spill patchpoint subregister operand");
MIB.addImm(StackMaps::IndirectMemRefOp);
@@ -448,10 +443,9 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
/// operand folded, otherwise NULL is returned. The client is responsible for
/// removing the old instruction and adding the new one in the instruction
/// stream.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FI) const {
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ ArrayRef<unsigned> Ops,
+ int FI) const {
unsigned Flags = 0;
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (MI->getOperand(Ops[i]).isDef())
@@ -517,10 +511,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
#ifndef NDEBUG
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 9048a44..58a6d52 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -664,6 +664,44 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
return UNKNOWN_LIBCALL;
}
+RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) {
+#define OP_TO_LIBCALL(Name, Enum) \
+ case Name: \
+ switch (VT.SimpleTy) { \
+ default: \
+ return UNKNOWN_LIBCALL; \
+ case MVT::i8: \
+ return Enum##_1; \
+ case MVT::i16: \
+ return Enum##_2; \
+ case MVT::i32: \
+ return Enum##_4; \
+ case MVT::i64: \
+ return Enum##_8; \
+ case MVT::i128: \
+ return Enum##_16; \
+ }
+
+ switch (Opc) {
+ OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
+ OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
+ }
+
+#undef OP_TO_LIBCALL
+
+ return UNKNOWN_LIBCALL;
+}
+
/// InitCmpLibcallCCs - Set default comparison libcall CC.
///
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
@@ -695,12 +733,11 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
}
/// NOTE: The TargetMachine owns TLOF.
-TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
- : TM(tm), DL(TM.getDataLayout()) {
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
initActions();
// Perform these initializations only once.
- IsLittleEndian = DL->isLittleEndian();
+ IsLittleEndian = getDataLayout()->isLittleEndian();
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
= MaxStoresPerMemmoveOptSize = 4;
@@ -792,58 +829,21 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FLOG , MVT::f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::f16, Expand);
- setOperationAction(ISD::FEXP , MVT::f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::f16, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
- setOperationAction(ISD::FMINNUM, MVT::f16, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
- setOperationAction(ISD::FCEIL, MVT::f16, Expand);
- setOperationAction(ISD::FRINT, MVT::f16, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
- setOperationAction(ISD::FROUND, MVT::f16, Expand);
- setOperationAction(ISD::FLOG , MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10, MVT::f32, Expand);
- setOperationAction(ISD::FEXP , MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
- setOperationAction(ISD::FMINNUM, MVT::f32, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
- setOperationAction(ISD::FCEIL, MVT::f32, Expand);
- setOperationAction(ISD::FRINT, MVT::f32, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
- setOperationAction(ISD::FROUND, MVT::f32, Expand);
- setOperationAction(ISD::FLOG , MVT::f64, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG10, MVT::f64, Expand);
- setOperationAction(ISD::FEXP , MVT::f64, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
- setOperationAction(ISD::FMINNUM, MVT::f64, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
- setOperationAction(ISD::FCEIL, MVT::f64, Expand);
- setOperationAction(ISD::FRINT, MVT::f64, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
- setOperationAction(ISD::FROUND, MVT::f64, Expand);
- setOperationAction(ISD::FLOG , MVT::f128, Expand);
- setOperationAction(ISD::FLOG2, MVT::f128, Expand);
- setOperationAction(ISD::FLOG10, MVT::f128, Expand);
- setOperationAction(ISD::FEXP , MVT::f128, Expand);
- setOperationAction(ISD::FEXP2, MVT::f128, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
- setOperationAction(ISD::FMINNUM, MVT::f128, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f128, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
- setOperationAction(ISD::FCEIL, MVT::f128, Expand);
- setOperationAction(ISD::FRINT, MVT::f128, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
- setOperationAction(ISD::FROUND, MVT::f128, Expand);
+ for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+ setOperationAction(ISD::FLOG , VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FEXP , VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FMINNUM, VT, Expand);
+ setOperationAction(ISD::FMAXNUM, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FROUND, VT, Expand);
+ }
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -859,7 +859,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
}
unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
- return DL->getPointerSizeInBits(AS);
+ return getDataLayout()->getPointerSizeInBits(AS);
}
unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
@@ -868,7 +868,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
}
MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
- return MVT::getIntegerVT(8*DL->getPointerSize(0));
+ return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0));
}
EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
@@ -1144,6 +1144,10 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
+// This function is in TargetLowering because it uses RegClassForVT which would
+// need to be moved to TargetRegisterInfo and would necessitate moving
+// isTypeLegal over as well - a massive change that would just require
+// TargetLowering having a TargetRegisterInfo class member that it would use.
std::pair<const TargetRegisterClass *, uint8_t>
TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
@@ -1498,7 +1502,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
- return DL->getABITypeAlignment(Ty);
+ return getDataLayout()->getABITypeAlignment(Ty);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index c1b34f7..bcf2aa7 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -31,6 +31,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -244,22 +245,9 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
-const MCSection *TargetLoweringObjectFileELF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler &Mang, const TargetMachine &TM) const {
- unsigned Flags = getELFSectionFlags(Kind);
-
- // If we have -ffunction-section or -fdata-section then we should emit the
- // global value to a uniqued section specifically for it.
- bool EmitUniqueSection = false;
- if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
- if (Kind.isText())
- EmitUniqueSection = TM.getFunctionSections();
- else
- EmitUniqueSection = TM.getDataSections();
- }
- EmitUniqueSection |= GV->hasComdat();
-
+static const MCSectionELF *selectELFSectionForGlobal(
+ MCContext &Ctx, const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags) {
unsigned EntrySize = 0;
if (Kind.isMergeableCString()) {
if (Kind.isMergeable2ByteCString()) {
@@ -309,9 +297,29 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Name.push_back('.');
TM.getNameWithPrefix(Name, GV, Mang, true);
}
- return getContext().getELFSection(Name, getELFSectionType(Name, Kind), Flags,
- EntrySize, Group,
- EmitUniqueSection && !UniqueSectionNames);
+ return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
+ EntrySize, Group,
+ EmitUniqueSection && !UniqueSectionNames);
+}
+
+const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ unsigned Flags = getELFSectionFlags(Kind);
+
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniqueSection = false;
+ if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ }
+ EmitUniqueSection |= GV->hasComdat();
+
+ return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
+ EmitUniqueSection, Flags);
}
const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
@@ -323,7 +331,8 @@ const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
if (!EmitUniqueSection)
return ReadOnlySection;
- return SelectSectionForGlobal(&F, SectionKind::getReadOnly(), Mang, TM);
+ return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
+ Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC);
}
bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
@@ -423,6 +432,11 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
// MachO
//===----------------------------------------------------------------------===//
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+ : TargetLoweringObjectFile() {
+ SupportIndirectSymViaGOTPCRel = true;
+}
+
/// getDepLibFromLinkerOpt - Extract the dependent library name from a linker
/// option string. Returns StringRef() if the option does not specify a library.
StringRef TargetLoweringObjectFileMachO::
@@ -697,6 +711,66 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
return SSym;
}
+const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation
+ // as 64-bit do, we replace the GOT equivalent by accessing the final symbol
+ // through a non_lazy_ptr stub instead. One advantage is that it allows the
+ // computation of deltas to final external symbols. Example:
+ //
+ // _extgotequiv:
+ // .long _extfoo
+ //
+ // _delta:
+ // .long _extgotequiv-_delta
+ //
+ // is transformed to:
+ //
+ // _delta:
+ // .long L_extfoo$non_lazy_ptr-(_delta+0)
+ //
+ // .section __IMPORT,__pointers,non_lazy_symbol_pointers
+ // L_extfoo$non_lazy_ptr:
+ // .indirect_symbol _extfoo
+ // .long 0
+ //
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MCContext &Ctx = getContext();
+
+ // The offset must consider the original displacement from the base symbol
+ // since 32-bit targets don't have a GOTPCREL to fold the PC displacement.
+ Offset = -MV.getConstant();
+ const MCSymbol *BaseSym = &MV.getSymB()->getSymbol();
+
+ // Access the final symbol via sym$non_lazy_ptr and generate the appropriated
+ // non_lazy_ptr stubs.
+ SmallString<128> Name;
+ StringRef Suffix = "$non_lazy_ptr";
+ Name += DL->getPrivateGlobalPrefix();
+ Name += Sym->getName();
+ Name += Suffix;
+ MCSymbol *Stub = Ctx.GetOrCreateSymbol(Name);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
+
+ const MCExpr *BSymExpr =
+ MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
+ const MCExpr *LHS =
+ MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (!Offset)
+ return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx);
+
+ const MCExpr *RHS =
+ MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx);
+ return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+}
+
//===----------------------------------------------------------------------===//
// COFF
//===----------------------------------------------------------------------===//
@@ -853,6 +927,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
StringRef COMDATSymName = Sym->getName();
return getContext().getCOFFSection(Name, Characteristics, Kind,
COMDATSymName, Selection);
+ } else {
+ SmallString<256> TmpData;
+ getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM);
+ return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
+ Selection);
}
}
@@ -874,6 +953,42 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
return DataSection;
}
+void TargetLoweringObjectFileCOFF::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+ bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+ if (GV->hasPrivateLinkage() &&
+ ((isa<Function>(GV) && TM.getFunctionSections()) ||
+ (isa<GlobalVariable>(GV) && TM.getDataSections())))
+ CannotUsePrivateLabel = true;
+
+ Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
+ const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ const Comdat *C = F.getComdat();
+ bool EmitUniqueSection = TM.getFunctionSections() || C;
+ if (!EmitUniqueSection)
+ return ReadOnlySection;
+
+ // FIXME: we should produce a symbol for F instead.
+ if (F.hasPrivateLinkage())
+ return ReadOnlySection;
+
+ MCSymbol *Sym = TM.getSymbol(&F, Mang);
+ StringRef COMDATSymName = Sym->getName();
+
+ SectionKind Kind = SectionKind::getReadOnly();
+ const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+ return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
+}
+
StringRef TargetLoweringObjectFileCOFF::
getDepLibFromLinkerOpt(StringRef LinkerOption) const {
const char *LibCmd = "/DEFAULTLIB:";
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1bbe6e1..57daeab 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -102,6 +103,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
MachineBasicBlock::iterator OldPos);
+ bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
+
bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
@@ -309,6 +312,45 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
return true;
}
+/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg
+/// in current BB.
+static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ MachineInstr *Ret = nullptr;
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getParent() != BB || DefMI.isDebugValue())
+ continue;
+ if (!Ret)
+ Ret = &DefMI;
+ else if (Ret != &DefMI)
+ return nullptr;
+ }
+ return Ret;
+}
+
+/// Check if there is a reversed copy chain from FromReg to ToReg:
+/// %Tmp1 = copy %Tmp2;
+/// %FromReg = copy %Tmp1;
+/// %ToReg = add %FromReg ...
+/// %Tmp2 = copy %ToReg;
+/// MaxLen specifies the maximum length of the copy chain the func
+/// can walk through.
+bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
+ int Maxlen) {
+ unsigned TmpReg = FromReg;
+ for (int i = 0; i < Maxlen; i++) {
+ MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
+ if (!Def || !Def->isCopy())
+ return false;
+
+ TmpReg = Def->getOperand(1).getReg();
+
+ if (TmpReg == ToReg)
+ return true;
+ }
+ return false;
+}
+
/// noUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
@@ -574,6 +616,27 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
if (!noUseAfterLastDef(regB, Dist, LastDefB))
return true;
+ // Look for situation like this:
+ // %reg101 = MOV %reg100
+ // %reg102 = ...
+ // %reg103 = ADD %reg102, %reg101
+ // ... = %reg103 ...
+ // %reg100 = MOV %reg103
+ // If there is a reversed copy chain from reg101 to reg103, commute the ADD
+ // to eliminate an otherwise unavoidable copy.
+ // FIXME:
+ // We can extend the logic further: If an pair of operands in an insn has
+ // been merged, the insn could be regarded as a virtual copy, and the virtual
+ // copy could also be used to construct a copy chain.
+ // To more generally minimize register copies, ideally the logic of two addr
+ // instruction pass should be integrated with register allocation pass where
+ // interference graph is available.
+ if (isRevCopyChain(regC, regA, 3))
+ return true;
+
+ if (isRevCopyChain(regB, regA, 3))
+ return false;
+
// Since there are no intervening uses for both registers, then commute
// if the def of regC is closer. Its live interval is shorter.
return LastDefB && LastDefC && LastDefC > LastDefB;
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 7d3b0ce..d9adfdf 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -286,7 +286,7 @@ void VirtRegRewriter::addMBBLiveIns() {
}
void VirtRegRewriter::rewrite() {
- bool NoSubRegLiveness = !MRI->tracksSubRegLiveness();
+ bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 6f712a9..ab0f96e 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -16,6 +16,8 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/IR/Function.h"
@@ -25,6 +27,10 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include <memory>
@@ -36,25 +42,31 @@ using namespace llvm::PatternMatch;
namespace {
-struct HandlerAllocas {
- TinyPtrVector<AllocaInst *> Allocas;
- int ParentFrameAllocationIndex;
-};
-
// This map is used to model frame variable usage during outlining, to
// construct a structure type to hold the frame variables in a frame
// allocation block, and to remap the frame variable allocas (including
// spill locations as needed) to GEPs that get the variable from the
// frame allocation structure.
-typedef MapVector<AllocaInst *, HandlerAllocas> FrameVarInfoMap;
+typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
-class WinEHPrepare : public FunctionPass {
- std::unique_ptr<FunctionPass> DwarfPrepare;
+typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
+
+enum ActionType { Catch, Cleanup };
+
+class LandingPadActions;
+class ActionHandler;
+class CatchHandler;
+class CleanupHandler;
+class LandingPadMap;
+typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
+typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy;
+
+class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
WinEHPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), DwarfPrepare(createDwarfEHPass(TM)) {}
+ : FunctionPass(ID) {}
bool runOnFunction(Function &Fn) override;
@@ -67,11 +79,24 @@ public:
}
private:
- bool prepareCPPEHHandlers(Function &F,
- SmallVectorImpl<LandingPadInst *> &LPads);
- bool outlineCatchHandler(Function *SrcFn, Constant *SelectorType,
- LandingPadInst *LPad, CallInst *&EHAlloc,
- AllocaInst *&EHObjPtr, FrameVarInfoMap &VarInfo);
+ bool prepareExceptionHandlers(Function &F,
+ SmallVectorImpl<LandingPadInst *> &LPads);
+ bool outlineHandler(ActionHandler *Action, Function *SrcFn,
+ LandingPadInst *LPad, BasicBlock *StartBB,
+ FrameVarInfoMap &VarInfo);
+
+ void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
+ CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
+ VisitedBlockSet &VisitedBlocks);
+ CleanupHandler *findCleanupHandler(BasicBlock *StartBB, BasicBlock *EndBB);
+
+ void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
+
+ // All fields are reset by runOnFunction.
+ EHPersonality Personality;
+ CatchHandlerMapTy CatchHandlerMap;
+ CleanupHandlerMapTy CleanupHandlerMap;
+ DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
};
class WinEHFrameVariableMaterializer : public ValueMaterializer {
@@ -87,34 +112,218 @@ private:
IRBuilder<> Builder;
};
-class WinEHCatchDirector : public CloningDirector {
+class LandingPadMap {
+public:
+ LandingPadMap() : OriginLPad(nullptr) {}
+ void mapLandingPad(const LandingPadInst *LPad);
+
+ bool isInitialized() { return OriginLPad != nullptr; }
+
+ bool mapIfEHPtrLoad(const LoadInst *Load) {
+ return mapIfEHLoad(Load, EHPtrStores, EHPtrStoreAddrs);
+ }
+ bool mapIfSelectorLoad(const LoadInst *Load) {
+ return mapIfEHLoad(Load, SelectorStores, SelectorStoreAddrs);
+ }
+
+ bool isLandingPadSpecificInst(const Instruction *Inst) const;
+
+ void remapSelector(ValueToValueMapTy &VMap, Value *MappedValue) const;
+
+private:
+ bool mapIfEHLoad(const LoadInst *Load,
+ SmallVectorImpl<const StoreInst *> &Stores,
+ SmallVectorImpl<const Value *> &StoreAddrs);
+
+ const LandingPadInst *OriginLPad;
+ // We will normally only see one of each of these instructions, but
+ // if more than one occurs for some reason we can handle that.
+ TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
+ TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
+
+ // In optimized code, there will typically be at most one instance of
+ // each of the following, but in unoptimized IR it is not uncommon
+ // for the values to be stored, loaded and then stored again. In that
+ // case we will create a second entry for each store and store address.
+ SmallVector<const StoreInst *, 2> EHPtrStores;
+ SmallVector<const StoreInst *, 2> SelectorStores;
+ SmallVector<const Value *, 2> EHPtrStoreAddrs;
+ SmallVector<const Value *, 2> SelectorStoreAddrs;
+};
+
+class WinEHCloningDirectorBase : public CloningDirector {
public:
- WinEHCatchDirector(LandingPadInst *LPI, Function *CatchFn, Value *Selector,
- Value *EHObj, FrameVarInfoMap &VarInfo)
- : LPI(LPI), CurrentSelector(Selector->stripPointerCasts()), EHObj(EHObj),
- Materializer(CatchFn, VarInfo),
- SelectorIDType(Type::getInt32Ty(LPI->getContext())),
- Int8PtrType(Type::getInt8PtrTy(LPI->getContext())) {}
+ WinEHCloningDirectorBase(Function *HandlerFn,
+ FrameVarInfoMap &VarInfo,
+ LandingPadMap &LPadMap)
+ : Materializer(HandlerFn, VarInfo),
+ SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
+ Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())),
+ LPadMap(LPadMap) {}
CloningAction handleInstruction(ValueToValueMapTy &VMap,
const Instruction *Inst,
BasicBlock *NewBB) override;
+ virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) = 0;
+ virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) = 0;
+ virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) = 0;
+ virtual CloningAction handleInvoke(ValueToValueMapTy &VMap,
+ const InvokeInst *Invoke,
+ BasicBlock *NewBB) = 0;
+ virtual CloningAction handleResume(ValueToValueMapTy &VMap,
+ const ResumeInst *Resume,
+ BasicBlock *NewBB) = 0;
+
ValueMaterializer *getValueMaterializer() override { return &Materializer; }
-private:
- LandingPadInst *LPI;
- Value *CurrentSelector;
- Value *EHObj;
+protected:
WinEHFrameVariableMaterializer Materializer;
Type *SelectorIDType;
Type *Int8PtrType;
+ LandingPadMap &LPadMap;
+};
+
+class WinEHCatchDirector : public WinEHCloningDirectorBase {
+public:
+ WinEHCatchDirector(Function *CatchFn, Value *Selector,
+ FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+ : WinEHCloningDirectorBase(CatchFn, VarInfo, LPadMap),
+ CurrentSelector(Selector->stripPointerCasts()),
+ ExceptionObjectVar(nullptr) {}
+
+ CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
+ BasicBlock *NewBB) override;
+ CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
+ BasicBlock *NewBB) override;
+
+ const Value *getExceptionVar() { return ExceptionObjectVar; }
+ TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
+
+private:
+ Value *CurrentSelector;
- const Value *ExtractedEHPtr;
- const Value *ExtractedSelector;
- const Value *EHPtrStoreAddr;
- const Value *SelectorStoreAddr;
+ const Value *ExceptionObjectVar;
+ TinyPtrVector<BasicBlock *> ReturnTargets;
};
+
+class WinEHCleanupDirector : public WinEHCloningDirectorBase {
+public:
+ WinEHCleanupDirector(Function *CleanupFn,
+ FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+ : WinEHCloningDirectorBase(CleanupFn, VarInfo, LPadMap) {}
+
+ CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
+ BasicBlock *NewBB) override;
+ CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
+ BasicBlock *NewBB) override;
+};
+
+class ActionHandler {
+public:
+ ActionHandler(BasicBlock *BB, ActionType Type)
+ : StartBB(BB), Type(Type), HandlerBlockOrFunc(nullptr) {}
+
+ ActionType getType() const { return Type; }
+ BasicBlock *getStartBlock() const { return StartBB; }
+
+ bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; }
+
+ void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; }
+ Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; }
+
+private:
+ BasicBlock *StartBB;
+ ActionType Type;
+
+ // Can be either a BlockAddress or a Function depending on the EH personality.
+ Constant *HandlerBlockOrFunc;
+};
+
+class CatchHandler : public ActionHandler {
+public:
+ CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB)
+ : ActionHandler(BB, ActionType::Catch), Selector(Selector),
+ NextBB(NextBB), ExceptionObjectVar(nullptr) {}
+
+ // Method for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const ActionHandler *H) {
+ return H->getType() == ActionType::Catch;
+ }
+
+ Constant *getSelector() const { return Selector; }
+ BasicBlock *getNextBB() const { return NextBB; }
+
+ const Value *getExceptionVar() { return ExceptionObjectVar; }
+ TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
+
+ void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; }
+ void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) {
+ ReturnTargets = Targets;
+ }
+
+private:
+ Constant *Selector;
+ BasicBlock *NextBB;
+ const Value *ExceptionObjectVar;
+ TinyPtrVector<BasicBlock *> ReturnTargets;
+};
+
+class CleanupHandler : public ActionHandler {
+public:
+ CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {}
+
+ // Method for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const ActionHandler *H) {
+ return H->getType() == ActionType::Cleanup;
+ }
+};
+
+class LandingPadActions {
+public:
+ LandingPadActions() : HasCleanupHandlers(false) {}
+
+ void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); }
+ void insertCleanupHandler(CleanupHandler *Action) {
+ Actions.push_back(Action);
+ HasCleanupHandlers = true;
+ }
+
+ bool includesCleanup() const { return HasCleanupHandlers; }
+
+ SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
+ SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
+
+private:
+ // Note that this class does not own the ActionHandler objects in this vector.
+ // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap
+ // in the WinEHPrepare class.
+ SmallVector<ActionHandler *, 4> Actions;
+ bool HasCleanupHandlers;
+};
+
} // end anonymous namespace
char WinEHPrepare::ID = 0;
@@ -125,10 +334,10 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
return new WinEHPrepare(TM);
}
-static bool isMSVCPersonality(EHPersonality Pers) {
- return Pers == EHPersonality::MSVC_Win64SEH ||
- Pers == EHPersonality::MSVC_CXX;
-}
+// FIXME: Remove this once the backend can handle the prepared IR.
+static cl::opt<bool>
+SEHPrepare("sehprepare", cl::Hidden,
+ cl::desc("Prepare functions with SEH personalities"));
bool WinEHPrepare::runOnFunction(Function &Fn) {
SmallVector<LandingPadInst *, 4> LPads;
@@ -145,60 +354,67 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
return false;
// Classify the personality to see what kind of preparation we need.
- EHPersonality Pers = classifyEHPersonality(LPads.back()->getPersonalityFn());
-
- // Delegate through to the DWARF pass if this is unrecognized.
- if (!isMSVCPersonality(Pers))
- return DwarfPrepare->runOnFunction(Fn);
+ Personality = classifyEHPersonality(LPads.back()->getPersonalityFn());
- // FIXME: This only returns true if the C++ EH handlers were outlined.
- // When that code is complete, it should always return whatever
- // prepareCPPEHHandlers returns.
- if (Pers == EHPersonality::MSVC_CXX && prepareCPPEHHandlers(Fn, LPads))
- return true;
-
- // FIXME: SEH Cleanups are unimplemented. Replace them with unreachable.
- if (Resumes.empty())
+ // Do nothing if this is not an MSVC personality.
+ if (!isMSVCEHPersonality(Personality))
return false;
- for (ResumeInst *Resume : Resumes) {
- IRBuilder<>(Resume).CreateUnreachable();
- Resume->eraseFromParent();
+ if (isAsynchronousEHPersonality(Personality) && !SEHPrepare) {
+ // Replace all resume instructions with unreachable.
+ // FIXME: Remove this once the backend can handle the prepared IR.
+ for (ResumeInst *Resume : Resumes) {
+ IRBuilder<>(Resume).CreateUnreachable();
+ Resume->eraseFromParent();
+ }
+ return true;
}
+ // If there were any landing pads, prepareExceptionHandlers will make changes.
+ prepareExceptionHandlers(Fn, LPads);
return true;
}
bool WinEHPrepare::doFinalization(Module &M) {
- return DwarfPrepare->doFinalization(M);
+ return false;
}
-void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- DwarfPrepare->getAnalysisUsage(AU);
-}
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
-bool WinEHPrepare::prepareCPPEHHandlers(
+bool WinEHPrepare::prepareExceptionHandlers(
Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
// These containers are used to re-map frame variables that are used in
// outlined catch and cleanup handlers. They will be populated as the
// handlers are outlined.
FrameVarInfoMap FrameVarInfo;
- SmallVector<CallInst *, 4> HandlerAllocs;
- SmallVector<AllocaInst *, 4> HandlerEHObjPtrs;
bool HandlersOutlined = false;
+ Module *M = F.getParent();
+ LLVMContext &Context = M->getContext();
+
+ // Create a new function to receive the handler contents.
+ PointerType *Int8PtrType = Type::getInt8PtrTy(Context);
+ Type *Int32Type = Type::getInt32Ty(Context);
+ Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions);
+
for (LandingPadInst *LPad : LPads) {
// Look for evidence that this landingpad has already been processed.
bool LPadHasActionList = false;
BasicBlock *LPadBB = LPad->getParent();
- for (Instruction &Inst : LPadBB->getInstList()) {
- // FIXME: Make this an intrinsic.
- if (auto *Call = dyn_cast<CallInst>(&Inst))
- if (Call->getCalledFunction()->getName() == "llvm.eh.actions") {
+ for (Instruction &Inst : *LPadBB) {
+ if (auto *IntrinCall = dyn_cast<IntrinsicInst>(&Inst)) {
+ if (IntrinCall->getIntrinsicID() == Intrinsic::eh_actions) {
LPadHasActionList = true;
break;
}
+ }
+ // FIXME: This is here to help with the development of nested landing pad
+ // outlining. It should be removed when that is finished.
+ if (isa<UnreachableInst>(Inst)) {
+ LPadHasActionList = true;
+ break;
+ }
}
// If we've already outlined the handlers for this landingpad,
@@ -206,177 +422,244 @@ bool WinEHPrepare::prepareCPPEHHandlers(
if (LPadHasActionList)
continue;
- for (unsigned Idx = 0, NumClauses = LPad->getNumClauses(); Idx < NumClauses;
- ++Idx) {
- if (LPad->isCatch(Idx)) {
- // Create a new instance of the handler data structure in the
- // HandlerData vector.
- CallInst *EHAlloc = nullptr;
- AllocaInst *EHObjPtr = nullptr;
- bool Outlined = outlineCatchHandler(&F, LPad->getClause(Idx), LPad,
- EHAlloc, EHObjPtr, FrameVarInfo);
- if (Outlined) {
+ LandingPadActions Actions;
+ mapLandingPadBlocks(LPad, Actions);
+
+ for (ActionHandler *Action : Actions) {
+ if (Action->hasBeenProcessed())
+ continue;
+ BasicBlock *StartBB = Action->getStartBlock();
+
+ // SEH doesn't do any outlining for catches. Instead, pass the handler
+ // basic block addr to llvm.eh.actions and list the block as a return
+ // target.
+ if (isAsynchronousEHPersonality(Personality)) {
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ processSEHCatchHandler(CatchAction, StartBB);
HandlersOutlined = true;
- // These values must be resolved after all handlers have been
- // outlined.
- if (EHAlloc)
- HandlerAllocs.push_back(EHAlloc);
- if (EHObjPtr)
- HandlerEHObjPtrs.push_back(EHObjPtr);
+ continue;
}
- } // End if (isCatch)
- } // End for each clause
- } // End for each landingpad
+ }
+
+ if (outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo)) {
+ HandlersOutlined = true;
+ }
+ } // End for each Action
+
+ // FIXME: We need a guard against partially outlined functions.
+ if (!HandlersOutlined)
+ continue;
+
+ // Replace the landing pad with a new llvm.eh.action based landing pad.
+ BasicBlock *NewLPadBB = BasicBlock::Create(Context, "lpad", &F, LPadBB);
+ assert(!isa<PHINode>(LPadBB->begin()));
+ Instruction *NewLPad = LPad->clone();
+ NewLPadBB->getInstList().push_back(NewLPad);
+ while (!pred_empty(LPadBB)) {
+ auto *pred = *pred_begin(LPadBB);
+ InvokeInst *Invoke = cast<InvokeInst>(pred->getTerminator());
+ Invoke->setUnwindDest(NewLPadBB);
+ }
+
+ // Replace uses of the old lpad in phis with this block and delete the old
+ // block.
+ LPadBB->replaceSuccessorsPhiUsesWith(NewLPadBB);
+ LPadBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(LPadBB->getContext(), LPadBB);
+
+ // Add a call to describe the actions for this landing pad.
+ std::vector<Value *> ActionArgs;
+ for (ActionHandler *Action : Actions) {
+ // Action codes from docs are: 0 cleanup, 1 catch.
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
+ ActionArgs.push_back(CatchAction->getSelector());
+ Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
+ if (EHObj)
+ ActionArgs.push_back(EHObj);
+ else
+ ActionArgs.push_back(ConstantPointerNull::get(Int8PtrType));
+ } else {
+ ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
+ }
+ ActionArgs.push_back(Action->getHandlerBlockOrFunc());
+ }
+ CallInst *Recover =
+ CallInst::Create(ActionIntrin, ActionArgs, "recover", NewLPadBB);
+
+ // Add an indirect branch listing possible successors of the catch handlers.
+ IndirectBrInst *Branch = IndirectBrInst::Create(Recover, 0, NewLPadBB);
+ for (ActionHandler *Action : Actions) {
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ for (auto *Target : CatchAction->getReturnTargets()) {
+ Branch->addDestination(Target);
+ }
+ }
+ }
+ } // End for each landingpad
// If nothing got outlined, there is no more processing to be done.
if (!HandlersOutlined)
return false;
- // FIXME: We will replace the landingpad bodies with llvm.eh.actions
- // calls and indirect branches here and then delete blocks
- // which are no longer reachable. That will get rid of the
- // handlers that we have outlined. There is code below
- // that looks for allocas with no uses in the parent function.
- // That will only happen after the pruning is implemented.
-
- // Remap the frame variables.
- SmallVector<Type *, 2> StructTys;
- StructTys.push_back(Type::getInt32Ty(F.getContext())); // EH state
- StructTys.push_back(Type::getInt8PtrTy(F.getContext())); // EH object
-
- // Start the index at two since we always have the above fields at 0 and 1.
- int Idx = 2;
-
- // FIXME: Sort the FrameVarInfo vector by the ParentAlloca size and alignment
- // and add padding as necessary to provide the proper alignment.
-
- // Map the alloca instructions to the corresponding index in the
- // frame allocation structure. If any alloca is used only in a single
- // handler and is not used in the parent frame after outlining, it will
- // be assigned an index of -1, meaning the handler can keep its
- // "temporary" alloca and the original alloca can be erased from the
- // parent function. If we later encounter this alloca in a second
- // handler, we will assign it a place in the frame allocation structure
- // at that time. Since the instruction replacement doesn't happen until
- // all the entries in the HandlerData have been processed this isn't a
- // problem.
- for (auto &VarInfoEntry : FrameVarInfo) {
- AllocaInst *ParentAlloca = VarInfoEntry.first;
- HandlerAllocas &AllocaInfo = VarInfoEntry.second;
-
- // If the instruction still has uses in the parent function or if it is
- // referenced by more than one handler, add it to the frame allocation
- // structure.
- if (ParentAlloca->getNumUses() != 0 || AllocaInfo.Allocas.size() > 1) {
- Type *VarTy = ParentAlloca->getAllocatedType();
- StructTys.push_back(VarTy);
- AllocaInfo.ParentFrameAllocationIndex = Idx++;
- } else {
- // If the variable is not used in the parent frame and it is only used
- // in one handler, the alloca can be removed from the parent frame
- // and the handler will keep its "temporary" alloca to define the value.
- // An element index of -1 is used to indicate this condition.
- AllocaInfo.ParentFrameAllocationIndex = -1;
- }
- }
+ // Delete any blocks that were only used by handlers that were outlined above.
+ removeUnreachableBlocks(F);
- // Having filled the StructTys vector and assigned an index to each element,
- // we can now create the structure.
- StructType *EHDataStructTy = StructType::create(
- F.getContext(), StructTys, "struct." + F.getName().str() + ".ehdata");
- IRBuilder<> Builder(F.getParent()->getContext());
-
- // Create a frame allocation.
- Module *M = F.getParent();
- LLVMContext &Context = M->getContext();
BasicBlock *Entry = &F.getEntryBlock();
+ IRBuilder<> Builder(F.getParent()->getContext());
Builder.SetInsertPoint(Entry->getFirstInsertionPt());
- Function *FrameAllocFn =
- Intrinsic::getDeclaration(M, Intrinsic::frameallocate);
- uint64_t EHAllocSize = M->getDataLayout()->getTypeAllocSize(EHDataStructTy);
- Value *FrameAllocArgs[] = {
- ConstantInt::get(Type::getInt32Ty(Context), EHAllocSize)};
- CallInst *FrameAlloc =
- Builder.CreateCall(FrameAllocFn, FrameAllocArgs, "frame.alloc");
-
- Value *FrameEHData = Builder.CreateBitCast(
- FrameAlloc, EHDataStructTy->getPointerTo(), "eh.data");
-
- // Now visit each handler that is using the structure and bitcast its EHAlloc
- // value to be a pointer to the frame alloc structure.
- DenseMap<Function *, Value *> EHDataMap;
- for (CallInst *EHAlloc : HandlerAllocs) {
- // The EHAlloc has no uses at this time, so we need to just insert the
- // cast before the next instruction. There is always a next instruction.
- BasicBlock::iterator II = EHAlloc;
- ++II;
- Builder.SetInsertPoint(cast<Instruction>(II));
- Value *EHData = Builder.CreateBitCast(
- EHAlloc, EHDataStructTy->getPointerTo(), "eh.data");
- EHDataMap[EHAlloc->getParent()->getParent()] = EHData;
- }
- // Next, replace the place-holder EHObjPtr allocas with GEP instructions
- // that pull the EHObjPtr from the frame alloc structure
- for (AllocaInst *EHObjPtr : HandlerEHObjPtrs) {
- Value *EHData = EHDataMap[EHObjPtr->getParent()->getParent()];
- Builder.SetInsertPoint(EHObjPtr);
- Value *ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, 1);
- EHObjPtr->replaceAllUsesWith(ElementPtr);
- EHObjPtr->removeFromParent();
- ElementPtr->takeName(EHObjPtr);
- delete EHObjPtr;
- }
+ Function *FrameEscapeFn =
+ Intrinsic::getDeclaration(M, Intrinsic::frameescape);
+ Function *RecoverFrameFn =
+ Intrinsic::getDeclaration(M, Intrinsic::framerecover);
// Finally, replace all of the temporary allocas for frame variables used in
- // the outlined handlers and the original frame allocas with GEP instructions
- // that get the equivalent pointer from the frame allocation struct.
+ // the outlined handlers with calls to llvm.framerecover.
+ BasicBlock::iterator II = Entry->getFirstInsertionPt();
+ Instruction *AllocaInsertPt = II;
+ SmallVector<Value *, 8> AllocasToEscape;
for (auto &VarInfoEntry : FrameVarInfo) {
- AllocaInst *ParentAlloca = VarInfoEntry.first;
- HandlerAllocas &AllocaInfo = VarInfoEntry.second;
- int Idx = AllocaInfo.ParentFrameAllocationIndex;
-
- // If we have an index of -1 for this instruction, it means it isn't used
- // outside of this handler. In that case, we just keep the "temporary"
- // alloca in the handler and erase the original alloca from the parent.
- if (Idx == -1) {
+ Value *ParentVal = VarInfoEntry.first;
+ TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
+
+ // If the mapped value isn't already an alloca, we need to spill it if it
+ // is a computed value or copy it if it is an argument.
+ AllocaInst *ParentAlloca = dyn_cast<AllocaInst>(ParentVal);
+ if (!ParentAlloca) {
+ if (auto *Arg = dyn_cast<Argument>(ParentVal)) {
+ // Lower this argument to a copy and then demote that to the stack.
+ // We can't just use the argument location because the handler needs
+ // it to be in the frame allocation block.
+ // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
+ Value *TrueValue = ConstantInt::getTrue(Context);
+ Value *UndefValue = UndefValue::get(Arg->getType());
+ Instruction *SI =
+ SelectInst::Create(TrueValue, Arg, UndefValue,
+ Arg->getName() + ".tmp", AllocaInsertPt);
+ Arg->replaceAllUsesWith(SI);
+ // Reset the select operand, because it was clobbered by the RAUW above.
+ SI->setOperand(1, Arg);
+ ParentAlloca = DemoteRegToStack(*SI, true, SI);
+ } else if (auto *PN = dyn_cast<PHINode>(ParentVal)) {
+ ParentAlloca = DemotePHIToStack(PN, AllocaInsertPt);
+ } else {
+ Instruction *ParentInst = cast<Instruction>(ParentVal);
+ // FIXME: This is a work-around to temporarily handle the case where an
+ // instruction that is only used in handlers is not sunk.
+ // Without uses, DemoteRegToStack would just eliminate the value.
+ // This will fail if ParentInst is an invoke.
+ if (ParentInst->getNumUses() == 0) {
+ BasicBlock::iterator InsertPt = ParentInst;
+ ++InsertPt;
+ ParentAlloca =
+ new AllocaInst(ParentInst->getType(), nullptr,
+ ParentInst->getName() + ".reg2mem", InsertPt);
+ new StoreInst(ParentInst, ParentAlloca, InsertPt);
+ } else {
+ ParentAlloca = DemoteRegToStack(*ParentInst, true, ParentInst);
+ }
+ }
+ }
+
+ // If the parent alloca is no longer used and only one of the handlers used
+ // it, erase the parent and leave the copy in the outlined handler.
+ if (ParentAlloca->getNumUses() == 0 && Allocas.size() == 1) {
ParentAlloca->eraseFromParent();
- } else {
- // Otherwise, we replace the parent alloca and all outlined allocas
- // which map to it with GEP instructions.
-
- // First replace the original alloca.
- Builder.SetInsertPoint(ParentAlloca);
- Builder.SetCurrentDebugLocation(ParentAlloca->getDebugLoc());
- Value *ElementPtr =
- Builder.CreateConstInBoundsGEP2_32(FrameEHData, 0, Idx);
- ParentAlloca->replaceAllUsesWith(ElementPtr);
- ParentAlloca->removeFromParent();
- ElementPtr->takeName(ParentAlloca);
- delete ParentAlloca;
-
- // Next replace all outlined allocas that are mapped to it.
- for (AllocaInst *TempAlloca : AllocaInfo.Allocas) {
- Value *EHData = EHDataMap[TempAlloca->getParent()->getParent()];
- // FIXME: Sink this GEP into the blocks where it is used.
- Builder.SetInsertPoint(TempAlloca);
- Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
- ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, Idx);
- TempAlloca->replaceAllUsesWith(ElementPtr);
- TempAlloca->removeFromParent();
- ElementPtr->takeName(TempAlloca);
- delete TempAlloca;
+ continue;
+ }
+
+ // Add this alloca to the list of things to escape.
+ AllocasToEscape.push_back(ParentAlloca);
+
+ // Next replace all outlined allocas that are mapped to it.
+ for (AllocaInst *TempAlloca : Allocas) {
+ Function *HandlerFn = TempAlloca->getParent()->getParent();
+ // FIXME: Sink this GEP into the blocks where it is used.
+ Builder.SetInsertPoint(TempAlloca);
+ Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
+ Value *RecoverArgs[] = {
+ Builder.CreateBitCast(&F, Int8PtrType, ""),
+ &(HandlerFn->getArgumentList().back()),
+ llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)};
+ Value *RecoveredAlloca = Builder.CreateCall(RecoverFrameFn, RecoverArgs);
+ // Add a pointer bitcast if the alloca wasn't an i8.
+ if (RecoveredAlloca->getType() != TempAlloca->getType()) {
+ RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8");
+ RecoveredAlloca =
+ Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType());
}
- } // end else of if (Idx == -1)
- } // End for each FrameVarInfo entry.
+ TempAlloca->replaceAllUsesWith(RecoveredAlloca);
+ TempAlloca->removeFromParent();
+ RecoveredAlloca->takeName(TempAlloca);
+ delete TempAlloca;
+ }
+ } // End for each FrameVarInfo entry.
+
+ // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry
+ // block.
+ Builder.SetInsertPoint(&F.getEntryBlock().back());
+ Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
+
+ // Insert an alloca for the EH state in the entry block. On x86, we will also
+ // insert stores to update the EH state, but on other ISAs, the runtime does
+ // it for us.
+ // FIXME: This record is different on x86.
+ Type *UnwindHelpTy = Type::getInt64Ty(Context);
+ AllocaInst *UnwindHelp =
+ new AllocaInst(UnwindHelpTy, "unwindhelp", &F.getEntryBlock().front());
+ Builder.CreateStore(llvm::ConstantInt::get(UnwindHelpTy, -2), UnwindHelp);
+ Function *UnwindHelpFn =
+ Intrinsic::getDeclaration(M, Intrinsic::eh_unwindhelp);
+ Builder.CreateCall(UnwindHelpFn,
+ Builder.CreateBitCast(UnwindHelp, Int8PtrType));
+
+ // Clean up the handler action maps we created for this function
+ DeleteContainerSeconds(CatchHandlerMap);
+ CatchHandlerMap.clear();
+ DeleteContainerSeconds(CleanupHandlerMap);
+ CleanupHandlerMap.clear();
return HandlersOutlined;
}
-bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType,
- LandingPadInst *LPad, CallInst *&EHAlloc,
- AllocaInst *&EHObjPtr,
- FrameVarInfoMap &VarInfo) {
+// This function examines a block to determine whether the block ends with a
+// conditional branch to a catch handler based on a selector comparison.
+// This function is used both by the WinEHPrepare::findSelectorComparison() and
+// WinEHCleanupDirector::handleTypeIdFor().
+static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
+ Constant *&Selector, BasicBlock *&NextBB) {
+ ICmpInst::Predicate Pred;
+ BasicBlock *TBB, *FBB;
+ Value *LHS, *RHS;
+
+ if (!match(BB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB)))
+ return false;
+
+ if (!match(LHS,
+ m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) &&
+ !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))))
+ return false;
+
+ if (Pred == CmpInst::ICMP_EQ) {
+ CatchHandler = TBB;
+ NextBB = FBB;
+ return true;
+ }
+
+ if (Pred == CmpInst::ICMP_NE) {
+ CatchHandler = FBB;
+ NextBB = TBB;
+ return true;
+ }
+
+ return false;
+}
+
+bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
+ LandingPadInst *LPad, BasicBlock *StartBB,
+ FrameVarInfoMap &VarInfo) {
Module *M = SrcFn->getParent();
LLVMContext &Context = M->getContext();
@@ -385,133 +668,241 @@ bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType,
std::vector<Type *> ArgTys;
ArgTys.push_back(Int8PtrType);
ArgTys.push_back(Int8PtrType);
- FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false);
- Function *CatchHandler = Function::Create(
- FnType, GlobalVariable::ExternalLinkage, SrcFn->getName() + ".catch", M);
+ Function *Handler;
+ if (Action->getType() == Catch) {
+ FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false);
+ Handler = Function::Create(FnType, GlobalVariable::InternalLinkage,
+ SrcFn->getName() + ".catch", M);
+ } else {
+ FunctionType *FnType =
+ FunctionType::get(Type::getVoidTy(Context), ArgTys, false);
+ Handler = Function::Create(FnType, GlobalVariable::InternalLinkage,
+ SrcFn->getName() + ".cleanup", M);
+ }
// Generate a standard prolog to setup the frame recovery structure.
IRBuilder<> Builder(Context);
- BasicBlock *Entry = BasicBlock::Create(Context, "catch.entry");
- CatchHandler->getBasicBlockList().push_front(Entry);
+ BasicBlock *Entry = BasicBlock::Create(Context, "entry");
+ Handler->getBasicBlockList().push_front(Entry);
Builder.SetInsertPoint(Entry);
Builder.SetCurrentDebugLocation(LPad->getDebugLoc());
- // The outlined handler will be called with the parent's frame pointer as
- // its second argument. To enable the handler to access variables from
- // the parent frame, we use that pointer to get locate a special block
- // of memory that was allocated using llvm.eh.allocateframe for this
- // purpose. During the outlining process we will determine which frame
- // variables are used in handlers and create a structure that maps these
- // variables into the frame allocation block.
- //
- // The frame allocation block also contains an exception state variable
- // used by the runtime and a pointer to the exception object pointer
- // which will be filled in by the runtime for use in the handler.
- Function *RecoverFrameFn =
- Intrinsic::getDeclaration(M, Intrinsic::framerecover);
- Value *RecoverArgs[] = {Builder.CreateBitCast(SrcFn, Int8PtrType, ""),
- &(CatchHandler->getArgumentList().back())};
- EHAlloc = Builder.CreateCall(RecoverFrameFn, RecoverArgs, "eh.alloc");
-
- // This alloca is only temporary. We'll be replacing it once we know all the
- // frame variables that need to go in the frame allocation structure.
- EHObjPtr = Builder.CreateAlloca(Int8PtrType, 0, "eh.obj.ptr");
-
- // This will give us a raw pointer to the exception object, which
- // corresponds to the formal parameter of the catch statement. If the
- // handler uses this object, we will generate code during the outlining
- // process to cast the pointer to the appropriate type and deference it
- // as necessary. The un-outlined landing pad code represents the
- // exception object as the result of the llvm.eh.begincatch call.
- Value *EHObj = Builder.CreateLoad(EHObjPtr, false, "eh.obj");
+ std::unique_ptr<WinEHCloningDirectorBase> Director;
ValueToValueMapTy VMap;
- // FIXME: Map other values referenced in the filter handler.
-
- WinEHCatchDirector Director(LPad, CatchHandler, SelectorType, EHObj, VarInfo);
+ LandingPadMap &LPadMap = LPadMaps[LPad];
+ if (!LPadMap.isInitialized())
+ LPadMap.mapLandingPad(LPad);
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ Constant *Sel = CatchAction->getSelector();
+ Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap));
+ LPadMap.remapSelector(VMap, ConstantInt::get(Type::getInt32Ty(Context), 1));
+ } else {
+ Director.reset(new WinEHCleanupDirector(Handler, VarInfo, LPadMap));
+ }
SmallVector<ReturnInst *, 8> Returns;
- ClonedCodeInfo InlinedFunctionInfo;
+ ClonedCodeInfo OutlinedFunctionInfo;
+
+ // If the start block contains PHI nodes, we need to map them.
+ BasicBlock::iterator II = StartBB->begin();
+ while (auto *PN = dyn_cast<PHINode>(II)) {
+ bool Mapped = false;
+ // Look for PHI values that we have already mapped (such as the selector).
+ for (Value *Val : PN->incoming_values()) {
+ if (VMap.count(Val)) {
+ VMap[PN] = VMap[Val];
+ Mapped = true;
+ }
+ }
+ // If we didn't find a match for this value, map it as an undef.
+ if (!Mapped) {
+ VMap[PN] = UndefValue::get(PN->getType());
+ }
+ ++II;
+ }
- BasicBlock::iterator II = LPad;
+ // Skip over PHIs and, if applicable, landingpad instructions.
+ II = StartBB->getFirstInsertionPt();
- CloneAndPruneIntoFromInst(CatchHandler, SrcFn, ++II, VMap,
+ CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap,
/*ModuleLevelChanges=*/false, Returns, "",
- &InlinedFunctionInfo,
- SrcFn->getParent()->getDataLayout(), &Director);
+ &OutlinedFunctionInfo, Director.get());
// Move all the instructions in the first cloned block into our entry block.
BasicBlock *FirstClonedBB = std::next(Function::iterator(Entry));
Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList());
FirstClonedBB->eraseFromParent();
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ WinEHCatchDirector *CatchDirector =
+ reinterpret_cast<WinEHCatchDirector *>(Director.get());
+ CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
+ CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
+ }
+
+ Action->setHandlerBlockOrFunc(Handler);
+
return true;
}
-CloningDirector::CloningAction WinEHCatchDirector::handleInstruction(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // Intercept instructions which extract values from the landing pad aggregate.
- if (auto *Extract = dyn_cast<ExtractValueInst>(Inst)) {
- if (Extract->getAggregateOperand() == LPI) {
- assert(Extract->getNumIndices() == 1 &&
- "Unexpected operation: extracting both landing pad values");
- assert((*(Extract->idx_begin()) == 0 || *(Extract->idx_begin()) == 1) &&
- "Unexpected operation: extracting an unknown landing pad element");
-
- if (*(Extract->idx_begin()) == 0) {
- // Element 0 doesn't directly corresponds to anything in the WinEH
- // scheme.
- // It will be stored to a memory location, then later loaded and finally
- // the loaded value will be used as the argument to an
- // llvm.eh.begincatch
- // call. We're tracking it here so that we can skip the store and load.
- ExtractedEHPtr = Inst;
- } else {
- // Element 1 corresponds to the filter selector. We'll map it to 1 for
- // matching purposes, but it will also probably be stored to memory and
- // reloaded, so we need to track the instuction so that we can map the
- // loaded value too.
- VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
- ExtractedSelector = Inst;
- }
-
- // Tell the caller not to clone this instruction.
- return CloningDirector::SkipInstruction;
- }
- // Other extract value instructions just get cloned.
- return CloningDirector::CloneInstruction;
+/// This BB must end in a selector dispatch. All we need to do is pass the
+/// handler block to llvm.eh.actions and list it as a possible indirectbr
+/// target.
+void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction,
+ BasicBlock *StartBB) {
+ BasicBlock *HandlerBB;
+ BasicBlock *NextBB;
+ Constant *Selector;
+ bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB);
+ if (Res) {
+ // If this was EH dispatch, this must be a conditional branch to the handler
+ // block.
+ // FIXME: Handle instructions in the dispatch block. Currently we drop them,
+ // leading to crashes if some optimization hoists stuff here.
+ assert(CatchAction->getSelector() && HandlerBB &&
+ "expected catch EH dispatch");
+ } else {
+ // This must be a catch-all. Split the block after the landingpad.
+ assert(CatchAction->getSelector()->isNullValue() && "expected catch-all");
+ HandlerBB =
+ StartBB->splitBasicBlock(StartBB->getFirstInsertionPt(), "catch.all");
}
+ CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB));
+ TinyPtrVector<BasicBlock *> Targets(HandlerBB);
+ CatchAction->setReturnTargets(Targets);
+}
- if (auto *Store = dyn_cast<StoreInst>(Inst)) {
- // Look for and suppress stores of the extracted landingpad values.
- const Value *StoredValue = Store->getValueOperand();
- if (StoredValue == ExtractedEHPtr) {
- EHPtrStoreAddr = Store->getPointerOperand();
- return CloningDirector::SkipInstruction;
+void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
+ // Each instance of this class should only ever be used to map a single
+ // landing pad.
+ assert(OriginLPad == nullptr || OriginLPad == LPad);
+
+ // If the landing pad has already been mapped, there's nothing more to do.
+ if (OriginLPad == LPad)
+ return;
+
+ OriginLPad = LPad;
+
+ // The landingpad instruction returns an aggregate value. Typically, its
+ // value will be passed to a pair of extract value instructions and the
+ // results of those extracts are often passed to store instructions.
+ // In unoptimized code the stored value will often be loaded and then stored
+ // again.
+ for (auto *U : LPad->users()) {
+ const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
+ if (!Extract)
+ continue;
+ assert(Extract->getNumIndices() == 1 &&
+ "Unexpected operation: extracting both landing pad values");
+ unsigned int Idx = *(Extract->idx_begin());
+ assert((Idx == 0 || Idx == 1) &&
+ "Unexpected operation: extracting an unknown landing pad element");
+ if (Idx == 0) {
+ // Element 0 doesn't directly corresponds to anything in the WinEH
+ // scheme.
+ // It will be stored to a memory location, then later loaded and finally
+ // the loaded value will be used as the argument to an
+ // llvm.eh.begincatch
+ // call. We're tracking it here so that we can skip the store and load.
+ ExtractedEHPtrs.push_back(Extract);
+ } else if (Idx == 1) {
+ // Element 1 corresponds to the filter selector. We'll map it to 1 for
+ // matching purposes, but it will also probably be stored to memory and
+ // reloaded, so we need to track the instuction so that we can map the
+ // loaded value too.
+ ExtractedSelectors.push_back(Extract);
}
- if (StoredValue == ExtractedSelector) {
- SelectorStoreAddr = Store->getPointerOperand();
- return CloningDirector::SkipInstruction;
+
+ // Look for stores of the extracted values.
+ for (auto *EU : Extract->users()) {
+ if (auto *Store = dyn_cast<StoreInst>(EU)) {
+ if (Idx == 1) {
+ SelectorStores.push_back(Store);
+ SelectorStoreAddrs.push_back(Store->getPointerOperand());
+ } else {
+ EHPtrStores.push_back(Store);
+ EHPtrStoreAddrs.push_back(Store->getPointerOperand());
+ }
+ }
}
+ }
+}
- // Any other store just gets cloned.
- return CloningDirector::CloneInstruction;
+bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
+ if (Inst == OriginLPad)
+ return true;
+ for (auto *Extract : ExtractedEHPtrs) {
+ if (Inst == Extract)
+ return true;
+ }
+ for (auto *Extract : ExtractedSelectors) {
+ if (Inst == Extract)
+ return true;
+ }
+ for (auto *Store : EHPtrStores) {
+ if (Inst == Store)
+ return true;
+ }
+ for (auto *Store : SelectorStores) {
+ if (Inst == Store)
+ return true;
+ }
+
+ return false;
+}
+
+void LandingPadMap::remapSelector(ValueToValueMapTy &VMap,
+ Value *MappedValue) const {
+ // Remap all selector extract instructions to the specified value.
+ for (auto *Extract : ExtractedSelectors)
+ VMap[Extract] = MappedValue;
+}
+
+bool LandingPadMap::mapIfEHLoad(const LoadInst *Load,
+ SmallVectorImpl<const StoreInst *> &Stores,
+ SmallVectorImpl<const Value *> &StoreAddrs) {
+ // This makes the assumption that a store we've previously seen dominates
+ // this load instruction. That might seem like a rather huge assumption,
+ // but given the way that landingpads are constructed its fairly safe.
+ // FIXME: Add debug/assert code that verifies this.
+ const Value *LoadAddr = Load->getPointerOperand();
+ for (auto *StoreAddr : StoreAddrs) {
+ if (LoadAddr == StoreAddr) {
+ // Handle the common debug scenario where this loaded value is stored
+ // to a different location.
+ for (auto *U : Load->users()) {
+ if (auto *Store = dyn_cast<StoreInst>(U)) {
+ Stores.push_back(Store);
+ StoreAddrs.push_back(Store->getPointerOperand());
+ }
+ }
+ return true;
+ }
}
+ return false;
+}
+
+CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // If this is one of the boilerplate landing pad instructions, skip it.
+ // The instruction will have already been remapped in VMap.
+ if (LPadMap.isLandingPadSpecificInst(Inst))
+ return CloningDirector::SkipInstruction;
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
// Look for loads of (previously suppressed) landingpad values.
- // The EHPtr load can be ignored (it should only be used as
- // an argument to llvm.eh.begincatch), but the selector value
- // needs to be mapped to a constant value of 1 to be used to
- // simplify the branching to always flow to the current handler.
- const Value *LoadAddr = Load->getPointerOperand();
- if (LoadAddr == EHPtrStoreAddr) {
- VMap[Inst] = UndefValue::get(Int8PtrType);
+ // The EHPtr load can be mapped to an undef value as it should only be used
+ // as an argument to llvm.eh.begincatch, but the selector value needs to be
+ // mapped to a constant value of 1. This value will be used to simplify the
+ // branching to always flow to the current handler.
+ if (LPadMap.mapIfSelectorLoad(Load)) {
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
return CloningDirector::SkipInstruction;
}
- if (LoadAddr == SelectorStoreAddr) {
- VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+ if (LPadMap.mapIfEHPtrLoad(Load)) {
+ VMap[Inst] = UndefValue::get(Int8PtrType);
return CloningDirector::SkipInstruction;
}
@@ -519,108 +910,576 @@ CloningDirector::CloningAction WinEHCatchDirector::handleInstruction(
return CloningDirector::CloneInstruction;
}
- if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) {
- // The argument to the call is some form of the first element of the
- // landingpad aggregate value, but that doesn't matter. It isn't used
- // here.
- // The return value of this instruction, however, is used to access the
- // EH object pointer. We have generated an instruction to get that value
- // from the EH alloc block, so we can just map to that here.
- VMap[Inst] = EHObj;
- return CloningDirector::SkipInstruction;
- }
- if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) {
- auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
- // It might be interesting to track whether or not we are inside a catch
- // function, but that might make the algorithm more brittle than it needs
- // to be.
-
- // The end catch call can occur in one of two places: either in a
- // landingpad
- // block that is part of the catch handlers exception mechanism, or at the
- // end of the catch block. If it occurs in a landing pad, we must skip it
- // and continue so that the landing pad gets cloned.
- // FIXME: This case isn't fully supported yet and shouldn't turn up in any
- // of the test cases until it is.
- if (IntrinCall->getParent()->isLandingPad())
- return CloningDirector::SkipInstruction;
-
- // If an end catch occurs anywhere else the next instruction should be an
- // unconditional branch instruction that we want to replace with a return
- // to the the address of the branch target.
- const BasicBlock *EndCatchBB = IntrinCall->getParent();
- const TerminatorInst *Terminator = EndCatchBB->getTerminator();
- const BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
- assert(Branch && Branch->isUnconditional());
- assert(std::next(BasicBlock::const_iterator(IntrinCall)) ==
- BasicBlock::const_iterator(Branch));
-
- ReturnInst::Create(NewBB->getContext(),
- BlockAddress::get(Branch->getSuccessor(0)), NewBB);
-
- // We just added a terminator to the cloned block.
- // Tell the caller to stop processing the current basic block so that
- // the branch instruction will be skipped.
+ // Nested landing pads will be cloned as stubs, with just the
+ // landingpad instruction and an unreachable instruction. When
+ // all landingpads have been outlined, we'll replace this with the
+ // llvm.eh.actions call and indirect branch created when the
+ // landing pad was outlined.
+ if (auto *NestedLPad = dyn_cast<LandingPadInst>(Inst)) {
+ Instruction *NewInst = NestedLPad->clone();
+ if (NestedLPad->hasName())
+ NewInst->setName(NestedLPad->getName());
+ // FIXME: Store this mapping somewhere else also.
+ VMap[NestedLPad] = NewInst;
+ BasicBlock::InstListType &InstList = NewBB->getInstList();
+ InstList.push_back(NewInst);
+ InstList.push_back(new UnreachableInst(NewBB->getContext()));
return CloningDirector::StopCloningBB;
}
- if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) {
- auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
- Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
- // This causes a replacement that will collapse the landing pad CFG based
- // on the filter function we intend to match.
- if (Selector == CurrentSelector)
- VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
- else
- VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
- // Tell the caller not to clone this instruction.
- return CloningDirector::SkipInstruction;
- }
+
+ if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
+ return handleInvoke(VMap, Invoke, NewBB);
+
+ if (auto *Resume = dyn_cast<ResumeInst>(Inst))
+ return handleResume(VMap, Resume, NewBB);
+
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
+ return handleBeginCatch(VMap, Inst, NewBB);
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+ return handleEndCatch(VMap, Inst, NewBB);
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+ return handleTypeIdFor(VMap, Inst, NewBB);
// Continue with the default cloning behavior.
return CloningDirector::CloneInstruction;
}
+CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // The argument to the call is some form of the first element of the
+ // landingpad aggregate value, but that doesn't matter. It isn't used
+ // here.
+ // The second argument is an outparameter where the exception object will be
+ // stored. Typically the exception object is a scalar, but it can be an
+ // aggregate when catching by value.
+ // FIXME: Leave something behind to indicate where the exception object lives
+ // for this handler. Should it be part of llvm.eh.actions?
+ assert(ExceptionObjectVar == nullptr && "Multiple calls to "
+ "llvm.eh.begincatch found while "
+ "outlining catch handler.");
+ ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
+ return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst, BasicBlock *NewBB) {
+ auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+ // It might be interesting to track whether or not we are inside a catch
+ // function, but that might make the algorithm more brittle than it needs
+ // to be.
+
+ // The end catch call can occur in one of two places: either in a
+ // landingpad block that is part of the catch handlers exception mechanism,
+ // or at the end of the catch block. If it occurs in a landing pad, we must
+ // skip it and continue so that the landing pad gets cloned.
+ // FIXME: This case isn't fully supported yet and shouldn't turn up in any
+ // of the test cases until it is.
+ if (IntrinCall->getParent()->isLandingPad())
+ return CloningDirector::SkipInstruction;
+
+ // If an end catch occurs anywhere else the next instruction should be an
+ // unconditional branch instruction that we want to replace with a return
+ // to the the address of the branch target.
+ const BasicBlock *EndCatchBB = IntrinCall->getParent();
+ const TerminatorInst *Terminator = EndCatchBB->getTerminator();
+ const BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
+ assert(Branch && Branch->isUnconditional());
+ assert(std::next(BasicBlock::const_iterator(IntrinCall)) ==
+ BasicBlock::const_iterator(Branch));
+
+ BasicBlock *ContinueLabel = Branch->getSuccessor(0);
+ ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueLabel),
+ NewBB);
+ ReturnTargets.push_back(ContinueLabel);
+
+ // We just added a terminator to the cloned block.
+ // Tell the caller to stop processing the current basic block so that
+ // the branch instruction will be skipped.
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+ Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
+ // This causes a replacement that will collapse the landing pad CFG based
+ // on the filter function we intend to match.
+ if (Selector == CurrentSelector)
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+ else
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+ // Tell the caller not to clone this instruction.
+ return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap,
+ const InvokeInst *Invoke, BasicBlock *NewBB) {
+ return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
+ const ResumeInst *Resume, BasicBlock *NewBB) {
+ // Resume instructions shouldn't be reachable from catch handlers.
+ // We still need to handle it, but it will be pruned.
+ BasicBlock::InstListType &InstList = NewBB->getInstList();
+ InstList.push_back(new UnreachableInst(NewBB->getContext()));
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // Catch blocks within cleanup handlers will always be unreachable.
+ // We'll insert an unreachable instruction now, but it will be pruned
+ // before the cloning process is complete.
+ BasicBlock::InstListType &InstList = NewBB->getInstList();
+ InstList.push_back(new UnreachableInst(NewBB->getContext()));
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // Catch blocks within cleanup handlers will always be unreachable.
+ // We'll insert an unreachable instruction now, but it will be pruned
+ // before the cloning process is complete.
+ BasicBlock::InstListType &InstList = NewBB->getInstList();
+ InstList.push_back(new UnreachableInst(NewBB->getContext()));
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // If we encounter a selector comparison while cloning a cleanup handler,
+ // we want to stop cloning immediately. Anything after the dispatch
+ // will be outlined into a different handler.
+ BasicBlock *CatchHandler;
+ Constant *Selector;
+ BasicBlock *NextBB;
+ if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()),
+ CatchHandler, Selector, NextBB)) {
+ ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+ return CloningDirector::StopCloningBB;
+ }
+ // If eg.typeid.for is called for any other reason, it can be ignored.
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+ return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
+ ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) {
+ // All invokes in cleanup handlers can be replaced with calls.
+ SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3);
+ // Insert a normal call instruction...
+ CallInst *NewCall =
+ CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs,
+ Invoke->getName(), NewBB);
+ NewCall->setCallingConv(Invoke->getCallingConv());
+ NewCall->setAttributes(Invoke->getAttributes());
+ NewCall->setDebugLoc(Invoke->getDebugLoc());
+ VMap[Invoke] = NewCall;
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(Invoke->getNormalDest(), NewBB);
+
+ // The unwind destination won't be cloned into the new function, so
+ // we don't need to clean up its phi nodes.
+
+ // We just added a terminator to the cloned block.
+ // Tell the caller to stop processing the current basic block.
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
+ ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) {
+ ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+
+ // We just added a terminator to the cloned block.
+ // Tell the caller to stop processing the current basic block so that
+ // the branch instruction will be skipped.
+ return CloningDirector::StopCloningBB;
+}
+
WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo)
: FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
Builder.SetInsertPoint(&OutlinedFn->getEntryBlock());
- // FIXME: Do something with the FrameVarMapped so that it is shared across the
- // function.
}
Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
- // If we're asked to materialize an alloca variable, we temporarily
- // create a matching alloca in the outlined function. When all the
- // outlining is complete, we'll collect these into a structure and
- // replace these temporary allocas with GEPs referencing the frame
- // allocation block.
+ // If we're asked to materialize a value that is an instruction, we
+ // temporarily create an alloca in the outlined function and add this
+ // to the FrameVarInfo map. When all the outlining is complete, we'll
+ // collect these into a structure, spilling non-alloca values in the
+ // parent frame as necessary, and replace these temporary allocas with
+ // GEPs referencing the frame allocation block.
+
+ // If the value is an alloca, the mapping is direct.
if (auto *AV = dyn_cast<AllocaInst>(V)) {
- AllocaInst *NewAlloca = Builder.CreateAlloca(
- AV->getAllocatedType(), AV->getArraySize(), AV->getName());
- FrameVarInfo[AV].Allocas.push_back(NewAlloca);
+ AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone());
+ Builder.Insert(NewAlloca, AV->getName());
+ FrameVarInfo[AV].push_back(NewAlloca);
return NewAlloca;
}
-// FIXME: Do PHI nodes need special handling?
+ // For other types of instructions or arguments, we need an alloca based on
+ // the value's type and a load of the alloca. The alloca will be replaced
+ // by a GEP, but the load will stay. In the parent function, the value will
+ // be spilled to a location in the frame allocation block.
+ if (isa<Instruction>(V) || isa<Argument>(V)) {
+ AllocaInst *NewAlloca =
+ Builder.CreateAlloca(V->getType(), nullptr, "eh.temp.alloca");
+ FrameVarInfo[V].push_back(NewAlloca);
+ LoadInst *NewLoad = Builder.CreateLoad(NewAlloca, V->getName() + ".reload");
+ return NewLoad;
+ }
-// FIXME: Are there other cases we can handle better? GEP, ExtractValue, etc.
+ // Don't materialize other values.
+ return nullptr;
+}
-// FIXME: This doesn't work during cloning because it finds an instruction
-// in the use list that isn't yet part of a basic block.
-#if 0
- // If we're asked to remap some other instruction, we'll need to
- // spill it to an alloca variable in the parent function and add a
- // temporary alloca in the outlined function to be processed as
- // described above.
- Instruction *Inst = dyn_cast<Instruction>(V);
- if (Inst) {
- AllocaInst *Spill = DemoteRegToStack(*Inst, true);
- AllocaInst *NewAlloca = Builder.CreateAlloca(Spill->getAllocatedType(),
- Spill->getArraySize());
- FrameVarMap[AV] = NewAlloca;
- return NewAlloca;
+// This function maps the catch and cleanup handlers that are reachable from the
+// specified landing pad. The landing pad sequence will have this basic shape:
+//
+// <cleanup handler>
+// <selector comparison>
+// <catch handler>
+// <cleanup handler>
+// <selector comparison>
+// <catch handler>
+// <cleanup handler>
+// ...
+//
+// Any of the cleanup slots may be absent. The cleanup slots may be occupied by
+// any arbitrary control flow, but all paths through the cleanup code must
+// eventually reach the next selector comparison and no path can skip to a
+// different selector comparisons, though some paths may terminate abnormally.
+// Therefore, we will use a depth first search from the start of any given
+// cleanup block and stop searching when we find the next selector comparison.
+//
+// If the landingpad instruction does not have a catch clause, we will assume
+// that any instructions other than selector comparisons and catch handlers can
+// be ignored. In practice, these will only be the boilerplate instructions.
+//
+// The catch handlers may also have any control structure, but we are only
+// interested in the start of the catch handlers, so we don't need to actually
+// follow the flow of the catch handlers. The start of the catch handlers can
+// be located from the compare instructions, but they can be skipped in the
+// flow by following the contrary branch.
+void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
+ LandingPadActions &Actions) {
+ unsigned int NumClauses = LPad->getNumClauses();
+ unsigned int HandlersFound = 0;
+ BasicBlock *BB = LPad->getParent();
+
+ DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
+
+ if (NumClauses == 0) {
+ // This landing pad contains only cleanup code.
+ CleanupHandler *Action = new CleanupHandler(BB);
+ CleanupHandlerMap[BB] = Action;
+ Actions.insertCleanupHandler(Action);
+ DEBUG(dbgs() << " Assuming cleanup code in block " << BB->getName()
+ << "\n");
+ assert(LPad->isCleanup());
+ return;
+ }
+
+ VisitedBlockSet VisitedBlocks;
+
+ while (HandlersFound != NumClauses) {
+ BasicBlock *NextBB = nullptr;
+
+ // See if the clause we're looking for is a catch-all.
+ // If so, the catch begins immediately.
+ if (isa<ConstantPointerNull>(LPad->getClause(HandlersFound))) {
+ // The catch all must occur last.
+ assert(HandlersFound == NumClauses - 1);
+
+ // For C++ EH, check if there is any interesting cleanup code before we
+ // begin the catch. This is important because cleanups cannot rethrow
+ // exceptions but code called from catches can. For SEH, it isn't
+ // important if some finally code before a catch-all is executed out of
+ // line or after recovering from the exception.
+ if (Personality == EHPersonality::MSVC_CXX) {
+ if (auto *CleanupAction = findCleanupHandler(BB, BB)) {
+ // Add a cleanup entry to the list
+ Actions.insertCleanupHandler(CleanupAction);
+ DEBUG(dbgs() << " Found cleanup code in block "
+ << CleanupAction->getStartBlock()->getName() << "\n");
+ }
+ }
+
+ // Add the catch handler to the action list.
+ CatchHandler *Action =
+ new CatchHandler(BB, LPad->getClause(HandlersFound), nullptr);
+ CatchHandlerMap[BB] = Action;
+ Actions.insertCatchHandler(Action);
+ DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n");
+ ++HandlersFound;
+
+ // Once we reach a catch-all, don't expect to hit a resume instruction.
+ BB = nullptr;
+ break;
+ }
+
+ CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
+ // See if there is any interesting code executed before the dispatch.
+ if (auto *CleanupAction =
+ findCleanupHandler(BB, CatchAction->getStartBlock())) {
+ // Add a cleanup entry to the list
+ Actions.insertCleanupHandler(CleanupAction);
+ DEBUG(dbgs() << " Found cleanup code in block "
+ << CleanupAction->getStartBlock()->getName() << "\n");
+ }
+
+ assert(CatchAction);
+ ++HandlersFound;
+
+ // Add the catch handler to the action list.
+ Actions.insertCatchHandler(CatchAction);
+ DEBUG(dbgs() << " Found catch dispatch in block "
+ << CatchAction->getStartBlock()->getName() << "\n");
+
+ // Move on to the block after the catch handler.
+ BB = NextBB;
+ }
+
+ // If we didn't wind up in a catch-all, see if there is any interesting code
+ // executed before the resume.
+ if (auto *CleanupAction = findCleanupHandler(BB, BB)) {
+ // Add a cleanup entry to the list
+ Actions.insertCleanupHandler(CleanupAction);
+ DEBUG(dbgs() << " Found cleanup code in block "
+ << CleanupAction->getStartBlock()->getName() << "\n");
+ }
+
+ // It's possible that some optimization moved code into a landingpad that
+ // wasn't
+ // previously being used for cleanup. If that happens, we need to execute
+ // that
+ // extra code from a cleanup handler.
+ if (Actions.includesCleanup() && !LPad->isCleanup())
+ LPad->setCleanup(true);
+}
+
+// This function searches starting with the input block for the next
+// block that terminates with a branch whose condition is based on a selector
+// comparison. This may be the input block. See the mapLandingPadBlocks
+// comments for a discussion of control flow assumptions.
+//
+CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
+ BasicBlock *&NextBB,
+ VisitedBlockSet &VisitedBlocks) {
+ // See if we've already found a catch handler use it.
+ // Call count() first to avoid creating a null entry for blocks
+ // we haven't seen before.
+ if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
+ CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]);
+ NextBB = Action->getNextBB();
+ return Action;
}
-#endif
+ // VisitedBlocks applies only to the current search. We still
+ // need to consider blocks that we've visited while mapping other
+ // landing pads.
+ VisitedBlocks.insert(BB);
+
+ BasicBlock *CatchBlock = nullptr;
+ Constant *Selector = nullptr;
+
+ // If this is the first time we've visited this block from any landing pad
+ // look to see if it is a selector dispatch block.
+ if (!CatchHandlerMap.count(BB)) {
+ if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
+ CatchHandler *Action = new CatchHandler(BB, Selector, NextBB);
+ CatchHandlerMap[BB] = Action;
+ return Action;
+ }
+ }
+
+ // Visit each successor, looking for the dispatch.
+ // FIXME: We expect to find the dispatch quickly, so this will probably
+ // work better as a breadth first search.
+ for (BasicBlock *Succ : successors(BB)) {
+ if (VisitedBlocks.count(Succ))
+ continue;
+
+ CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks);
+ if (Action)
+ return Action;
+ }
+ return nullptr;
+}
+
+// These are helper functions to combine repeated code from findCleanupHandler.
+static CleanupHandler *createCleanupHandler(CleanupHandlerMapTy &CleanupHandlerMap,
+ BasicBlock *BB) {
+ CleanupHandler *Action = new CleanupHandler(BB);
+ CleanupHandlerMap[BB] = Action;
+ return Action;
+}
+
+// This function searches starting with the input block for the next block that
+// contains code that is not part of a catch handler and would not be eliminated
+// during handler outlining.
+//
+CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
+ BasicBlock *EndBB) {
+ // Here we will skip over the following:
+ //
+ // landing pad prolog:
+ //
+ // Unconditional branches
+ //
+ // Selector dispatch
+ //
+ // Resume pattern
+ //
+ // Anything else marks the start of an interesting block
+
+ BasicBlock *BB = StartBB;
+ // Anything other than an unconditional branch will kick us out of this loop
+ // one way or another.
+ while (BB) {
+ // If we've already scanned this block, don't scan it again. If it is
+ // a cleanup block, there will be an action in the CleanupHandlerMap.
+ // If we've scanned it and it is not a cleanup block, there will be a
+ // nullptr in the CleanupHandlerMap. If we have not scanned it, there will
+ // be no entry in the CleanupHandlerMap. We must call count() first to
+ // avoid creating a null entry for blocks we haven't scanned.
+ if (CleanupHandlerMap.count(BB)) {
+ if (auto *Action = CleanupHandlerMap[BB]) {
+ return cast<CleanupHandler>(Action);
+ } else {
+ // Here we handle the case where the cleanup handler map contains a
+ // value for this block but the value is a nullptr. This means that
+ // we have previously analyzed the block and determined that it did
+ // not contain any cleanup code. Based on the earlier analysis, we
+ // know the the block must end in either an unconditional branch, a
+ // resume or a conditional branch that is predicated on a comparison
+ // with a selector. Either the resume or the selector dispatch
+ // would terminate the search for cleanup code, so the unconditional
+ // branch is the only case for which we might need to continue
+ // searching.
+ if (BB == EndBB)
+ return nullptr;
+ BasicBlock *SuccBB;
+ if (!match(BB->getTerminator(), m_UnconditionalBr(SuccBB)))
+ return nullptr;
+ BB = SuccBB;
+ continue;
+ }
+ }
+
+ // Create an entry in the cleanup handler map for this block. Initially
+ // we create an entry that says this isn't a cleanup block. If we find
+ // cleanup code, the caller will replace this entry.
+ CleanupHandlerMap[BB] = nullptr;
+
+ TerminatorInst *Terminator = BB->getTerminator();
+
+ // Landing pad blocks have extra instructions we need to accept.
+ LandingPadMap *LPadMap = nullptr;
+ if (BB->isLandingPad()) {
+ LandingPadInst *LPad = BB->getLandingPadInst();
+ LPadMap = &LPadMaps[LPad];
+ if (!LPadMap->isInitialized())
+ LPadMap->mapLandingPad(LPad);
+ }
+
+ // Look for the bare resume pattern:
+ // %exn2 = load i8** %exn.slot
+ // %sel2 = load i32* %ehselector.slot
+ // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn2, 0
+ // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel2, 1
+ // resume { i8*, i32 } %lpad.val2
+ if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
+ InsertValueInst *Insert1 = nullptr;
+ InsertValueInst *Insert2 = nullptr;
+ Value *ResumeVal = Resume->getOperand(0);
+ // If there is only one landingpad, we may use the lpad directly with no
+ // insertions.
+ if (isa<LandingPadInst>(ResumeVal))
+ return nullptr;
+ if (!isa<PHINode>(ResumeVal)) {
+ Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
+ if (!Insert2)
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
+ if (!Insert1)
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE; ++II) {
+ Instruction *Inst = II;
+ if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+ continue;
+ if (Inst == Insert1 || Inst == Insert2 || Inst == Resume)
+ continue;
+ if (!Inst->hasOneUse() ||
+ (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
+ }
+ return nullptr;
+ }
+
+ BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
+ if (Branch) {
+ if (Branch->isConditional()) {
+ // Look for the selector dispatch.
+ // %sel = load i32* %ehselector.slot
+ // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
+ // %matches = icmp eq i32 %sel12, %2
+ // br i1 %matches, label %catch14, label %eh.resume
+ CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
+ if (!Compare || !Compare->isEquality())
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(),
+ IE = BB->end();
+ II != IE; ++II) {
+ Instruction *Inst = II;
+ if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+ continue;
+ if (Inst == Compare || Inst == Branch)
+ continue;
+ if (!Inst->hasOneUse() || (Inst->user_back() != Compare))
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+ continue;
+ if (!isa<LoadInst>(Inst))
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
+ // The selector dispatch block should always terminate our search.
+ assert(BB == EndBB);
+ return nullptr;
+ } else {
+ // Look for empty blocks with unconditional branches.
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(),
+ IE = BB->end();
+ II != IE; ++II) {
+ Instruction *Inst = II;
+ if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+ continue;
+ if (Inst == Branch)
+ continue;
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+ continue;
+ // Anything else makes this interesting cleanup code.
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
+ if (BB == EndBB)
+ return nullptr;
+ // The branch was unconditional.
+ BB = Branch->getSuccessor(0);
+ continue;
+ } // End else of if branch was conditional
+ } // End if Branch
+
+ // Anything else makes this interesting cleanup code.
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
return nullptr;
}