aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/Analysis.cpp11
-rw-r--r--lib/CodeGen/Android.mk2
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/Android.mk66
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp102
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt2
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp79
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp161
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h134
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp62
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h2
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp (renamed from lib/CodeGen/AsmPrinter/DwarfException.cpp)99
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h138
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp26
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp2
-rw-r--r--lib/CodeGen/AtomicExpandLoadLinkedPass.cpp133
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp23
-rw-r--r--lib/CodeGen/BranchFolding.cpp15
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp22
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp51
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h6
-rw-r--r--lib/CodeGen/GlobalMerge.cpp361
-rw-r--r--lib/CodeGen/JumpInstrTables.cpp301
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp12
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp15
-rw-r--r--lib/CodeGen/LiveDebugVariables.h3
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp27
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp45
-rw-r--r--lib/CodeGen/MachineScheduler.cpp336
-rw-r--r--lib/CodeGen/Passes.cpp16
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp381
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp87
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp22
-rw-r--r--lib/CodeGen/RegisterPressure.cpp5
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp529
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp158
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp154
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp91
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h6
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp22
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp278
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp135
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp389
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp318
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp51
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp4
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp20
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp2
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp110
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp170
62 files changed, 3790 insertions, 1544 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 6fc83a2..1bdf312 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -7,13 +7,14 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines several CodeGen-specific LLVM IR analysis utilties.
+// This file defines several CodeGen-specific LLVM IR analysis utilities.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -474,8 +475,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS,
- const TargetLowering &TLI) {
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -490,7 +490,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+ (!DAG.getTarget().Options.GuaranteedTailCallOpt ||
!isa<UnreachableInst>(Term)))
return false;
@@ -509,7 +509,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
return false;
}
- return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI);
+ return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret,
+ *DAG.getTarget().getTargetLowering());
}
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 7feb42c..05e5c45 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -24,11 +24,13 @@ codegen_SRC_FILES := \
GCMetadata.cpp \
GCMetadataPrinter.cpp \
GCStrategy.cpp \
+ GlobalMerge.cpp \
IfConversion.cpp \
InlineSpiller.cpp \
InterferenceCache.cpp \
IntrinsicLowering.cpp \
JITCodeEmitter.cpp \
+ JumpInstrTables.cpp \
LatencyPriorityQueue.cpp \
LexicalScopes.cpp \
LiveDebugVariables.cpp \
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 1cb0159..251f5ef 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -37,8 +37,7 @@
using namespace llvm;
ARMException::ARMException(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitCFI(false) {}
+ : EHStreamer(A), shouldEmitCFI(false) {}
ARMException::~ARMException() {}
@@ -100,7 +99,7 @@ void ARMException::endFunction(const MachineFunction *) {
ATS.emitHandlerData();
// Emit actual exception table
- EmitExceptionTable();
+ emitExceptionTable();
}
}
@@ -108,7 +107,7 @@ void ARMException::endFunction(const MachineFunction *) {
ATS.emitFnEnd();
}
-void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
+void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index f56eb6e..083cc0d 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -1,33 +1,33 @@
LOCAL_PATH := $(call my-dir)
codegen_asmprinter_SRC_FILES := \
- AsmPrinter.cpp
+ AddressPool.cpp \
+ ARMException.cpp \
+ AsmPrinter.cpp \
+ AsmPrinterDwarf.cpp \
+ AsmPrinterInlineAsm.cpp \
+ DbgValueHistoryCalculator.cpp \
+ DIE.cpp \
+ DIEHash.cpp \
+ DwarfAccelTable.cpp \
+ DwarfCFIException.cpp \
+ DwarfDebug.cpp \
+ DwarfFile.cpp \
+ DwarfStringPool.cpp \
+ DwarfUnit.cpp \
+ EHStreamer.cpp \
+ ErlangGCPrinter.cpp \
+ OcamlGCPrinter.cpp \
+ Win64Exception.cpp \
+ WinCodeViewLineTables.cpp
+
+
# For the host
# =====================================================
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := \
- AddressPool.cpp \
- AsmPrinter.cpp \
- AsmPrinterDwarf.cpp \
- AsmPrinterInlineAsm.cpp \
- ARMException.cpp \
- DbgValueHistoryCalculator.cpp \
- DIE.cpp \
- DIEHash.cpp \
- DwarfAccelTable.cpp \
- DwarfCFIException.cpp \
- DwarfDebug.cpp \
- DwarfException.cpp \
- DwarfFile.cpp \
- DwarfStringPool.cpp \
- DwarfUnit.cpp \
- ErlangGCPrinter.cpp \
- OcamlGCPrinter.cpp \
- Win64Exception.cpp \
- WinCodeViewLineTables.cpp
-
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
LOCAL_MODULE:= libLLVMAsmPrinter
LOCAL_MODULE_TAGS := optional
@@ -41,27 +41,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := \
- AddressPool.cpp \
- AsmPrinter.cpp \
- AsmPrinterDwarf.cpp \
- AsmPrinterInlineAsm.cpp \
- ARMException.cpp \
- DbgValueHistoryCalculator.cpp \
- DIE.cpp \
- DIEHash.cpp \
- DwarfAccelTable.cpp \
- DwarfCFIException.cpp \
- DwarfDebug.cpp \
- DwarfException.cpp \
- DwarfFile.cpp \
- DwarfStringPool.cpp \
- DwarfUnit.cpp \
- ErlangGCPrinter.cpp \
- OcamlGCPrinter.cpp \
- Win64Exception.cpp \
- WinCodeViewLineTables.cpp
-
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
LOCAL_MODULE:= libLLVMAsmPrinter
LOCAL_MODULE_TAGS := optional
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7de9c6d..f80fdea 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -46,7 +47,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -232,23 +232,23 @@ bool AsmPrinter::doInitialization(Module &M) {
}
}
- DwarfException *DE = nullptr;
+ EHStreamer *ES = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
break;
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
- DE = new DwarfCFIException(this);
+ ES = new DwarfCFIException(this);
break;
case ExceptionHandling::ARM:
- DE = new ARMException(this);
+ ES = new ARMException(this);
break;
- case ExceptionHandling::Win64:
- DE = new Win64Exception(this);
+ case ExceptionHandling::WinEH:
+ ES = new Win64Exception(this);
break;
}
- if (DE)
- Handlers.push_back(HandlerInfo(DE, EHTimerName, DWARFGroupName));
+ if (ES)
+ Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName));
return false;
}
@@ -709,13 +709,12 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
}
bool AsmPrinter::needsSEHMoves() {
- return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+ return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH &&
MF->getFunction()->needsUnwindTableEntry();
}
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
- ExceptionHandling::ExceptionsType ExceptionHandlingType =
- MAI->getExceptionHandlingType();
+ ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
ExceptionHandlingType != ExceptionHandling::ARM)
return;
@@ -870,6 +869,8 @@ void AsmPrinter::EmitFunctionBody() {
OutStreamer.AddBlankLine();
}
+static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP);
+
bool AsmPrinter::doFinalization(Module &M) {
// Emit global variables.
for (const auto &G : M.globals())
@@ -887,6 +888,54 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, V, false);
}
+ // Get information about jump-instruction tables to print.
+ JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>();
+
+ if (JITI && !JITI->getTables().empty()) {
+ unsigned Arch = Triple(getTargetTriple()).getArch();
+ bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
+ MCInst TrapInst;
+ TM.getInstrInfo()->getTrap(TrapInst);
+ for (const auto &KV : JITI->getTables()) {
+ uint64_t Count = 0;
+ for (const auto &FunPair : KV.second) {
+ // Emit the function labels to make this be a function entry point.
+ MCSymbol *FunSym =
+ OutContext.GetOrCreateSymbol(FunPair.second->getName());
+ OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global);
+ // FIXME: JumpTableInstrInfo should store information about the required
+ // alignment of table entries and the size of the padding instruction.
+ EmitAlignment(3);
+ if (IsThumb)
+ OutStreamer.EmitThumbFunc(FunSym);
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction);
+ OutStreamer.EmitLabel(FunSym);
+
+ // Emit the jump instruction to transfer control to the original
+ // function.
+ MCInst JumpToFun;
+ MCSymbol *TargetSymbol =
+ OutContext.GetOrCreateSymbol(FunPair.first->getName());
+ const MCSymbolRefExpr *TargetSymRef =
+ MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
+ OutContext);
+ TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef);
+ OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo());
+ ++Count;
+ }
+
+ // Emit enough padding instructions to fill up to the next power of two.
+ // This assumes that the trap instruction takes 8 bytes or fewer.
+ uint64_t Remaining = NextPowerOf2(Count) - Count;
+ for (uint64_t C = 0; C < Remaining; ++C) {
+ EmitAlignment(3);
+ OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo());
+ }
+
+ }
+ }
+
// Emit module flags.
SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
M.getModuleFlagsMetadata(ModuleFlags);
@@ -932,10 +981,6 @@ bool AsmPrinter::doFinalization(Module &M) {
for (const auto &Alias : M.aliases()) {
MCSymbol *Name = getSymbol(&Alias);
- const GlobalValue *GV = Alias.getAliasee();
- assert(!GV->isDeclaration());
- MCSymbol *Target = getSymbol(GV);
-
if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
@@ -947,7 +992,7 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit the directives as assignments aka .set:
OutStreamer.EmitAssignment(Name,
- MCSymbolRefExpr::Create(Target, OutContext));
+ lowerConstant(Alias.getAliasee(), *this));
}
}
@@ -1248,7 +1293,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
// Ignore debug and non-emitted data. This handles llvm.compiler.used.
- if (GV->getSection() == "llvm.metadata" ||
+ if (StringRef(GV->getSection()) == "llvm.metadata" ||
GV->hasAvailableExternallyLinkage())
return true;
@@ -1350,14 +1395,17 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
- const MCSection *KeySec = nullptr;
- if (S.ComdatKey) {
- KeySym = getSymbol(S.ComdatKey);
- KeySec = getObjFileLowering().SectionForGlobal(S.ComdatKey, *Mang, TM);
+ if (GlobalValue *GV = S.ComdatKey) {
+ if (GV->hasAvailableExternallyLinkage())
+ // If the associated variable is available_externally, some other TU
+ // will provide its dynamic initializer.
+ continue;
+
+ KeySym = getSymbol(GV);
}
const MCSection *OutputSection =
- (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym, KeySec)
- : Obj.getStaticDtorSection(S.Priority, KeySym, KeySec));
+ (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
+ : Obj.getStaticDtorSection(S.Priority, KeySym));
OutStreamer.SwitchSection(OutputSection);
if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
EmitAlignment(Align);
@@ -1817,7 +1865,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
SmallString<8> StrVal;
CFP->getValueAPF().toString(StrVal);
- CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+ if (CFP->getType())
+ CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+ else
+ AP.OutStreamer.GetCommentOS() << "Printing <null> Type";
AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
}
@@ -1830,7 +1881,8 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
- if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) {
+ if (AP.TM.getDataLayout()->isBigEndian() &&
+ !CFP->getType()->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
if (TrailingBytes)
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index b4ef185..f555f21 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -10,10 +10,10 @@ add_llvm_library(LLVMAsmPrinter
DwarfAccelTable.cpp
DwarfCFIException.cpp
DwarfDebug.cpp
- DwarfException.cpp
DwarfFile.cpp
DwarfStringPool.cpp
DwarfUnit.cpp
+ EHStreamer.cpp
ErlangGCPrinter.cpp
OcamlGCPrinter.cpp
Win64Exception.cpp
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 6103254..a66d08e 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -15,6 +15,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <map>
+#include <set>
#define DEBUG_TYPE "dwarfdebug"
@@ -110,45 +111,73 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
RegVars.erase(I);
}
-// \brief Terminate location ranges for all variables, described by registers
-// clobbered by @MI.
-static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
- const MachineInstr &MI,
- const TargetRegisterInfo *TRI,
- DbgValueHistoryMap &HistMap) {
+// \brief Collect all registers clobbered by @MI and insert them to @Regs.
+static void collectClobberedRegisters(const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ std::set<unsigned> &Regs) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef() || !MO.getReg())
continue;
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
- ++AI) {
- unsigned RegNo = *AI;
- clobberRegisterUses(RegVars, RegNo, HistMap, MI);
- }
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Regs.insert(*AI);
}
}
-// \brief Terminate the location range for all register-described variables
-// by inserting @ClobberingInstr to their history.
-static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars,
- DbgValueHistoryMap &HistMap,
- const MachineInstr &ClobberingInstr) {
- for (const auto &I : RegVars)
- for (const auto &Var : I.second)
- HistMap.endInstrRange(Var, ClobberingInstr);
- RegVars.clear();
+// \brief Returns the first instruction in @MBB which corresponds to
+// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
+static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
+ auto LastMI = MBB.getLastNonDebugInstr();
+ if (LastMI == MBB.end() || !LastMI->isReturn())
+ return nullptr;
+ // Assume that epilogue starts with instruction having the same debug location
+ // as the return instruction.
+ DebugLoc LastLoc = LastMI->getDebugLoc();
+ auto Res = LastMI;
+ for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend();
+ ++I) {
+ if (I->getDebugLoc() != LastLoc)
+ return Res;
+ Res = std::prev(I.base());
+ }
+ // If all instructions have the same debug location, assume whole MBB is
+ // an epilogue.
+ return MBB.begin();
+}
+
+// \brief Collect registers that are modified in the function body (their
+// contents is changed only in the prologue and epilogue).
+static void collectChangingRegs(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ std::set<unsigned> &Regs) {
+ for (const auto &MBB : *MF) {
+ auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
+ bool IsInEpilogue = false;
+ for (const auto &MI : MBB) {
+ IsInEpilogue |= &MI == FirstEpilogueInst;
+ if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue)
+ collectClobberedRegisters(MI, TRI, Regs);
+ }
+ }
}
void calculateDbgValueHistory(const MachineFunction *MF,
const TargetRegisterInfo *TRI,
DbgValueHistoryMap &Result) {
- RegDescribedVarsMap RegVars;
+ std::set<unsigned> ChangingRegs;
+ collectChangingRegs(MF, TRI, ChangingRegs);
+ RegDescribedVarsMap RegVars;
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
if (!MI.isDebugValue()) {
// Not a DBG_VALUE instruction. It may clobber registers which describe
// some variables.
- clobberRegisterUses(RegVars, MI, TRI, Result);
+ std::set<unsigned> MIClobberedRegs;
+ collectClobberedRegisters(MI, TRI, MIClobberedRegs);
+ for (unsigned RegNo : MIClobberedRegs) {
+ if (ChangingRegs.count(RegNo))
+ clobberRegisterUses(RegVars, RegNo, Result, MI);
+ }
continue;
}
@@ -167,8 +196,10 @@ void calculateDbgValueHistory(const MachineFunction *MF,
// Make sure locations for register-described variables are valid only
// until the end of the basic block (unless it's the last basic block, in
// which case let their liveness run off to the end of the function).
- if (!MBB.empty() && &MBB != &MF->back())
- clobberAllRegistersUses(RegVars, Result, MBB.back());
+ if (!MBB.empty() && &MBB != &MF->back()) {
+ for (unsigned RegNo : ChangingRegs)
+ clobberRegisterUses(RegVars, RegNo, Result, MBB.back());
+ }
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 30312ac..74215aa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -40,9 +40,8 @@
using namespace llvm;
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
- moveTypeModule(AsmPrinter::CFI_M_None) {}
+ : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
+ shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
DwarfCFIException::~DwarfCFIException() {}
@@ -59,26 +58,16 @@ void DwarfCFIException::endModule() {
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+ if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect)
return;
// Emit references to all used personality functions
- bool AtLeastOne = false;
const std::vector<const Function*> &Personalities = MMI->getPersonalities();
for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
if (!Personalities[i])
continue;
MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
- AtLeastOne = true;
- }
-
- if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
- // This is a temporary hack to keep sections in the same order they
- // were before. This lets us produce bit identical outputs while
- // transitioning to CFI.
- Asm->OutStreamer.SwitchSection(
- const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection());
}
}
@@ -123,9 +112,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
- Asm->OutStreamer.EmitDebugLabel
- (Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
+ MCSymbol *EHBegin =
+ Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+ if (Asm->MAI->useAssignmentForEHBegin()) {
+ MCContext &Ctx = Asm->OutContext;
+ MCSymbol *CurPos = Ctx.CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(CurPos);
+ Asm->OutStreamer.EmitAssignment(EHBegin,
+ MCSymbolRefExpr::Create(CurPos, Ctx));
+ } else {
+ Asm->OutStreamer.EmitLabel(EHBegin);
+ }
// Provide LSDA information.
if (!shouldEmitLSDA)
@@ -153,5 +150,5 @@ void DwarfCFIException::endFunction(const MachineFunction *) {
// Map all labels and get rid of any dead landing pads.
MMI->TidyLandingPads();
- EmitExceptionTable();
+ emitExceptionTable();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 2a0615d..77860c0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -98,10 +98,6 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
clEnumVal(Disable, "Disabled"), clEnumValEnd),
cl::init(Default));
-static cl::opt<unsigned>
-DwarfVersionNumber("dwarf-version", cl::Hidden,
- cl::desc("Generate DWARF for dwarf version."), cl::init(0));
-
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "DWARF Debug Writer";
@@ -209,9 +205,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
else
HasDwarfPubSections = DwarfPubSections == Enable;
+ unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
+ Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
+
{
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
beginModule();
@@ -531,8 +530,7 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
// shouldn't be found by lookup.
AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE,
DIDescriptor());
- SPCU.applySubprogramAttributes(SP, *AbsDef);
- SPCU.addGlobalName(SP.getName(), *AbsDef, resolve(SP.getContext()));
+ SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef);
SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
createAndAddScopeChildren(SPCU, Scope, *AbsDef);
@@ -732,6 +730,8 @@ void DwarfDebug::beginModule() {
const Module *M = MMI->getModule();
+ FunctionDIs = makeSubprogramMap(*M);
+
// If module has named metadata anchors then use them, otherwise scan the
// module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -784,6 +784,26 @@ void DwarfDebug::beginModule() {
SectionMap[Asm->getObjFileLowering().getTextSection()];
}
+void DwarfDebug::finishVariableDefinitions() {
+ for (const auto &Var : ConcreteVariables) {
+ DIE *VariableDie = Var->getDIE();
+ // FIXME: There shouldn't be any variables without DIEs.
+ if (!VariableDie)
+ continue;
+ // FIXME: Consider the time-space tradeoff of just storing the unit pointer
+ // in the ConcreteVariables list, rather than looking it up again here.
+ // DIE::getUnit isn't simple - it walks parent pointers, etc.
+ DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
+ assert(Unit);
+ DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable());
+ if (AbsVar && AbsVar->getDIE()) {
+ Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
+ *AbsVar->getDIE());
+ } else
+ Unit->applyVariableAttributes(*Var, *VariableDie);
+ }
+}
+
void DwarfDebug::finishSubprogramDefinitions() {
const Module *M = MMI->getModule();
@@ -811,8 +831,7 @@ void DwarfDebug::finishSubprogramDefinitions() {
// inlined versions during codegen.
D = SPCU->getOrCreateSubprogramDIE(SP);
// And attach the attributes
- SPCU->applySubprogramAttributes(SP, *D);
- SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext()));
+ SPCU->applySubprogramAttributesToDefinition(SP, *D);
}
}
}
@@ -850,8 +869,10 @@ void DwarfDebug::collectDeadVariables() {
for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
DIVariable DV(Variables.getElement(vi));
assert(DV.isVariable());
- DbgVariable NewVar(DV, nullptr, this);
- SPDIE->addChild(SPCU->constructVariableDIE(NewVar));
+ DbgVariable NewVar(DV, this);
+ auto VariableDie = SPCU->constructVariableDIE(NewVar);
+ SPCU->applyVariableAttributes(NewVar, *VariableDie);
+ SPDIE->addChild(std::move(VariableDie));
}
}
}
@@ -861,6 +882,8 @@ void DwarfDebug::collectDeadVariables() {
void DwarfDebug::finalizeModuleInfo() {
finishSubprogramDefinitions();
+ finishVariableDefinitions();
+
// Collect info for variables that were optimized out.
collectDeadVariables();
@@ -1017,9 +1040,9 @@ void DwarfDebug::endModule() {
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
+ emitDebugLocDWO();
// Emit DWO addresses.
AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
- emitDebugLocDWO();
} else
// Emit info into a debug loc section.
emitDebugLoc();
@@ -1047,27 +1070,51 @@ void DwarfDebug::endModule() {
}
// Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
- DebugLoc ScopeLoc) {
- return findAbstractVariable(DV, ScopeLoc.getScope(DV->getContext()));
-}
-
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
- const MDNode *ScopeNode) {
+DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV,
+ DIVariable &Cleansed) {
LLVMContext &Ctx = DV->getContext();
// More then one inlined variable corresponds to one abstract variable.
- DIVariable Var = cleanseInlinedVariable(DV, Ctx);
- auto I = AbstractVariables.find(Var);
+ // FIXME: This duplication of variables when inlining should probably be
+ // removed. It's done to allow each DIVariable to describe its location
+ // because the DebugLoc on the dbg.value/declare isn't accurate. We should
+ // make it accurate then remove this duplication/cleansing stuff.
+ Cleansed = cleanseInlinedVariable(DV, Ctx);
+ auto I = AbstractVariables.find(Cleansed);
if (I != AbstractVariables.end())
return I->second.get();
+ return nullptr;
+}
- LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode);
- if (!Scope)
- return nullptr;
+DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
+ DIVariable Cleansed;
+ return getExistingAbstractVariable(DV, Cleansed);
+}
- auto AbsDbgVariable = make_unique<DbgVariable>(Var, nullptr, this);
+void DwarfDebug::createAbstractVariable(const DIVariable &Var,
+ LexicalScope *Scope) {
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, this);
addScopeVariable(Scope, AbsDbgVariable.get());
- return (AbstractVariables[Var] = std::move(AbsDbgVariable)).get();
+ AbstractVariables[Var] = std::move(AbsDbgVariable);
+}
+
+void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV,
+ const MDNode *ScopeNode) {
+ DIVariable Cleansed = DV;
+ if (getExistingAbstractVariable(DV, Cleansed))
+ return;
+
+ createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode));
+}
+
+void
+DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
+ const MDNode *ScopeNode) {
+ DIVariable Cleansed = DV;
+ if (getExistingAbstractVariable(DV, Cleansed))
+ return;
+
+ if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode))
+ createAbstractVariable(Cleansed, Scope);
}
// If Var is a current function argument then add it to CurrentFnArguments list.
@@ -1106,11 +1153,11 @@ void DwarfDebug::collectVariableInfoFromMMITable(
if (!Scope)
continue;
- DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc);
- DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this);
+ ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+ ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
+ DbgVariable *RegVar = ConcreteVariables.back().get();
RegVar->setFrameIndex(VI.Slot);
- if (!addCurrentFnArgument(RegVar, Scope))
- addScopeVariable(Scope, RegVar);
+ addScopeVariable(Scope, RegVar);
}
}
@@ -1175,18 +1222,14 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
Processed.insert(DV);
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
- DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
- DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this);
- if (!addCurrentFnArgument(RegVar, Scope))
- addScopeVariable(Scope, RegVar);
- if (AbsVar)
- AbsVar->setMInsn(MInsn);
+ ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+ ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
+ DbgVariable *RegVar = ConcreteVariables.back().get();
+ addScopeVariable(Scope, RegVar);
// Check if the first DBG_VALUE is valid for the rest of the function.
- if (Ranges.size() == 1 && Ranges.front().second == nullptr) {
- RegVar->setMInsn(MInsn);
+ if (Ranges.size() == 1 && Ranges.front().second == nullptr)
continue;
- }
// Handle multiple DBG_VALUE instructions describing one variable.
RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
@@ -1205,6 +1248,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() &&
!Begin->getOperand(0).getReg())
continue;
+ DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin);
+ if (End != nullptr)
+ DEBUG(dbgs() << "\t" << *End);
+ else
+ DEBUG(dbgs() << "\tNULL\n");
const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
@@ -1218,8 +1266,6 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
EndLabel = getLabelBeforeInsn(std::next(I)->first);
assert(EndLabel && "Forgot label after instruction ending a range!");
- DEBUG(dbgs() << "DotDebugLoc Pair:\n"
- << "\t" << *Begin << "\t" << *End << "\n");
DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU);
if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc))
DebugLoc.push_back(std::move(Loc));
@@ -1233,11 +1279,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
assert(DV.isVariable());
if (!Processed.insert(DV))
continue;
- if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
- addScopeVariable(
- Scope,
- new DbgVariable(DV, findAbstractVariable(DV, Scope->getScopeNode()),
- this));
+ if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
+ ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+ ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
+ addScopeVariable(Scope, ConcreteVariables.back().get());
+ }
}
}
@@ -1371,6 +1417,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
return;
+ auto DI = FunctionDIs.find(MF->getFunction());
+ if (DI == FunctionDIs.end())
+ return;
+
// Grab the lexical scopes for the function, if we don't have any of those
// then we're not going to be able to do anything.
LScopes.initialize(*MF);
@@ -1386,6 +1436,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ // FnScope->getScopeNode() and DI->second should represent the same function,
+ // though they may not be the same MDNode due to inline functions merged in
+ // LTO where the debug info metadata still differs (either due to distinct
+ // written differences - two versions of a linkonce_odr function
+ // written/copied into two separate files, or some sub-optimal metadata that
+ // isn't structurally identical (see: file path/name info from clang, which
+ // includes the directory of the cpp file being built, even when the file name
+ // is absolute (such as an <> lookup header)))
DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
assert(TheCU && "Unable to find compile unit!");
if (Asm->OutStreamer.hasRawTextSupport())
@@ -1440,6 +1498,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+ if (addCurrentFnArgument(Var, LS))
+ return;
SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
DIVariable DV = Var->getVariable();
// Variables with positive arg numbers are parameters.
@@ -1481,7 +1541,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
assert(CurFn == MF);
assert(CurFn != nullptr);
- if (!MMI->hasDebugInfo() || LScopes.empty()) {
+ if (!MMI->hasDebugInfo() || LScopes.empty() ||
+ !FunctionDIs.count(MF->getFunction())) {
// If we don't have a lexical scope for this function then there will
// be a hole in the range information. Keep note of this by setting the
// previously used section to nullptr.
@@ -1517,7 +1578,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
assert(DV && DV.isVariable());
if (!ProcessedVars.insert(DV))
continue;
- findAbstractVariable(DV, DV.getContext());
+ ensureAbstractVariableIsCreated(DV, DV.getContext());
}
constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
@@ -1536,12 +1597,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
- for (const auto &I : ScopeVariables)
- for (const auto *Var : I.second)
- if (!AbstractVariables.count(Var->getVariable()) || Var->getAbstractVariable())
- delete Var;
ScopeVariables.clear();
- DeleteContainerPointers(CurrentFnArguments);
+ CurrentFnArguments.clear();
DbgValues.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 2f5abc8..ffe4843 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -27,6 +27,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MachineLocation.h"
@@ -71,16 +72,21 @@ class DbgVariable {
DIVariable Var; // Variable Descriptor.
DIE *TheDIE; // Variable DIE.
unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
- DbgVariable *AbsVar; // Corresponding Abstract variable, if any.
const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
int FrameIndex;
DwarfDebug *DD;
public:
- // AbsVar may be NULL.
- DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD)
- : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), AbsVar(AV),
- MInsn(nullptr), FrameIndex(~0), DD(DD) {}
+ /// Construct a DbgVariable from a DIVariable.
+ DbgVariable(DIVariable V, DwarfDebug *DD)
+ : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr),
+ FrameIndex(~0), DD(DD) {}
+
+ /// Construct a DbgVariable from a DEBUG_VALUE.
+ /// AbstractVar may be NULL.
+ DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
+ : Var(DbgValue->getDebugVariable()), TheDIE(nullptr),
+ DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {}
// Accessors.
DIVariable getVariable() const { return Var; }
@@ -89,9 +95,7 @@ public:
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
StringRef getName() const { return Var.getName(); }
- DbgVariable *getAbstractVariable() const { return AbsVar; }
const MachineInstr *getMInsn() const { return MInsn; }
- void setMInsn(const MachineInstr *M) { MInsn = M; }
int getFrameIndex() const { return FrameIndex; }
void setFrameIndex(int FI) { FrameIndex = FI; }
// Translate tag to proper Dwarf tag.
@@ -200,6 +204,7 @@ class DwarfDebug : public AsmPrinterHandler {
// Collection of abstract variables.
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+ SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
// can refer to them in spite of insertions into this list.
@@ -325,6 +330,8 @@ class DwarfDebug : public AsmPrinterHandler {
DwarfAccelTable AccelNamespace;
DwarfAccelTable AccelTypes;
+ DenseMap<const Function *, DISubprogram> FunctionDIs;
+
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -334,8 +341,14 @@ class DwarfDebug : public AsmPrinterHandler {
}
/// \brief Find abstract variable associated with Var.
- DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
- DbgVariable *findAbstractVariable(DIVariable &Var, const MDNode *Scope);
+ DbgVariable *getExistingAbstractVariable(const DIVariable &DV,
+ DIVariable &Cleansed);
+ DbgVariable *getExistingAbstractVariable(const DIVariable &DV);
+ void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope);
+ void ensureAbstractVariableIsCreated(const DIVariable &Var,
+ const MDNode *Scope);
+ void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
+ const MDNode *Scope);
/// \brief Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
@@ -389,6 +402,8 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Collect info for variables that were optimized out.
void collectDeadVariables();
+ void finishVariableDefinitions();
+
void finishSubprogramDefinitions();
/// \brief Finish off debug information after all functions have been
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f792482..0440fce 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -14,138 +14,14 @@
#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
-#include "AsmPrinterHandler.h"
-#include "llvm/ADT/DenseMap.h"
+#include "EHStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include <vector>
namespace llvm {
-
-template <typename T> class SmallVectorImpl;
-struct LandingPadInfo;
-class MachineModuleInfo;
-class MachineInstr;
class MachineFunction;
-class MCAsmInfo;
-class MCExpr;
-class MCSymbol;
-class Function;
class ARMTargetStreamer;
-class AsmPrinter;
-
-//===----------------------------------------------------------------------===//
-/// DwarfException - Emits Dwarf exception handling directives.
-///
-class DwarfException : public AsmPrinterHandler {
-protected:
- /// Asm - Target of Dwarf emission.
- AsmPrinter *Asm;
-
- /// MMI - Collected machine module information.
- MachineModuleInfo *MMI;
-
- /// SharedTypeIds - How many leading type ids two landing pads have in common.
- static unsigned SharedTypeIds(const LandingPadInfo *L,
- const LandingPadInfo *R);
-
- /// PadRange - Structure holding a try-range and the associated landing pad.
- struct PadRange {
- // The index of the landing pad.
- unsigned PadIndex;
- // The index of the begin and end labels in the landing pad's label lists.
- unsigned RangeIndex;
- };
-
- typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
-
- /// ActionEntry - Structure describing an entry in the actions table.
- struct ActionEntry {
- int ValueForTypeID; // The value to write - may not be equal to the type id.
- int NextAction;
- unsigned Previous;
- };
-
- /// CallSiteEntry - Structure describing an entry in the call-site table.
- struct CallSiteEntry {
- // The 'try-range' is BeginLabel .. EndLabel.
- MCSymbol *BeginLabel; // zero indicates the start of the function.
- MCSymbol *EndLabel; // zero indicates the end of the function.
-
- // The landing pad starts at PadLabel.
- MCSymbol *PadLabel; // zero indicates that there is no landing pad.
- unsigned Action;
- };
-
- /// ComputeActionsTable - Compute the actions table and gather the first
- /// action index for each landing pad site.
- unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
- SmallVectorImpl<ActionEntry> &Actions,
- SmallVectorImpl<unsigned> &FirstActions);
-
- /// CallToNoUnwindFunction - Return `true' if this is a call to a function
- /// marked `nounwind'. Return `false' otherwise.
- bool CallToNoUnwindFunction(const MachineInstr *MI);
-
- /// ComputeCallSiteTable - Compute the call-site table. The entry for an
- /// invoke has a try-range containing the call, a non-zero landing pad and an
- /// appropriate action. The entry for an ordinary call has a try-range
- /// containing the call and zero for the landing pad and the action. Calls
- /// marked 'nounwind' have no entry and must not be contained in the try-range
- /// of any entry - they form gaps in the table. Entries must be ordered by
- /// try-range address.
- void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const RangeMapType &PadMap,
- const SmallVectorImpl<const LandingPadInfo *> &LPs,
- const SmallVectorImpl<unsigned> &FirstActions);
-
- /// EmitExceptionTable - Emit landing pads and actions.
- ///
- /// The general organization of the table is complex, but the basic concepts
- /// are easy. First there is a header which describes the location and
- /// organization of the three components that follow.
- /// 1. The landing pad site information describes the range of code covered
- /// by the try. In our case it's an accumulation of the ranges covered
- /// by the invokes in the try. There is also a reference to the landing
- /// pad that handles the exception once processed. Finally an index into
- /// the actions table.
- /// 2. The action table, in our case, is composed of pairs of type ids
- /// and next action offset. Starting with the action index from the
- /// landing pad site, each type Id is checked for a match to the current
- /// exception. If it matches then the exception and type id are passed
- /// on to the landing pad. Otherwise the next action is looked up. This
- /// chain is terminated with a next action of zero. If no type id is
- /// found the frame is unwound and handling continues.
- /// 3. Type id table contains references to all the C++ typeinfo for all
- /// catches in the function. This tables is reversed indexed base 1.
- void EmitExceptionTable();
-
- virtual void EmitTypeInfos(unsigned TTypeEncoding);
-
-public:
- //===--------------------------------------------------------------------===//
- // Main entry points.
- //
- DwarfException(AsmPrinter *A);
- virtual ~DwarfException();
-
- /// endModule - Emit all exception information that should come after the
- /// content.
- void endModule() override;
-
- /// beginFunction - Gather pre-function exception information. Assumes being
- /// emitted immediately after the function entry point.
- void beginFunction(const MachineFunction *MF) override;
-
- /// endFunction - Gather and emit post-function exception information.
- void endFunction(const MachineFunction *) override;
-
- // We don't need these.
- void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
- void beginInstruction(const MachineInstr *MI) override {}
- void endInstruction() override {}
-};
-class DwarfCFIException : public DwarfException {
+class DwarfCFIException : public EHStreamer {
/// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
/// should be emitted.
bool shouldEmitPersonality;
@@ -179,8 +55,8 @@ public:
void endFunction(const MachineFunction *) override;
};
-class ARMException : public DwarfException {
- void EmitTypeInfos(unsigned TTypeEncoding) override;
+class ARMException : public EHStreamer {
+ void emitTypeInfos(unsigned TTypeEncoding) override;
ARMTargetStreamer &getTargetStreamer();
/// shouldEmitCFI - Per-function flag to indicate if frame CFI info
@@ -206,7 +82,7 @@ public:
void endFunction(const MachineFunction *) override;
};
-class Win64Exception : public DwarfException {
+class Win64Exception : public EHStreamer {
/// shouldEmitPersonality - Per-function flag to indicate if personality
/// info should be emitted.
bool shouldEmitPersonality;
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index a70c0f7..9538bee 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1071,6 +1071,8 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
I != E; ++I) {
DIScope Ctx = *I;
StringRef Name = Ctx.getName();
+ if (Name.empty() && Ctx.isNameSpace())
+ Name = "(anonymous namespace)";
if (!Name.empty()) {
CS += Name;
CS += "::";
@@ -1359,12 +1361,13 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
return NDie;
DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
- if (!NS.getName().empty()) {
+ StringRef Name = NS.getName();
+ if (!Name.empty())
addString(NDie, dwarf::DW_AT_name, NS.getName());
- DD->addAccelNamespace(NS.getName(), NDie);
- addGlobalName(NS.getName(), NDie, NS.getContext());
- } else
- DD->addAccelNamespace("(anonymous namespace)", NDie);
+ else
+ Name = "(anonymous namespace)";
+ DD->addAccelNamespace(Name, NDie);
+ addGlobalName(Name, NDie, NS.getContext());
addSourceLine(NDie, NS);
return &NDie;
}
@@ -1382,14 +1385,14 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
// Add subprogram definitions to the CU die directly.
ContextDIE = &getUnitDie();
- // Build the decl now to ensure it preceeds the definition.
+ // Build the decl now to ensure it precedes the definition.
getOrCreateSubprogramDIE(SPDecl);
}
// DW_TAG_inlined_subroutine may refer to this DIE.
DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
- // Abort here and fill this in later, depending on whether or not this
+ // Stop here and fill this in later, depending on whether or not this
// subprogram turns out to have inlined instances or not.
if (SP.isDefinition())
return &SPDie;
@@ -1398,12 +1401,21 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
return &SPDie;
}
+void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) {
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+ applySubprogramAttributes(SP, SPDie);
+ addGlobalName(SP.getName(), SPDie, Context);
+}
+
void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
DIE *DeclDie = nullptr;
StringRef DeclLinkageName;
if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
DeclDie = getDIE(SPDecl);
- assert(DeclDie);
+ assert(DeclDie && "This DIE should've already been constructed when the "
+ "definition DIE was created in "
+ "getOrCreateSubprogramDIE");
DeclLinkageName = SPDecl.getLinkageName();
}
@@ -1502,6 +1514,17 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
addFlag(SPDie, dwarf::DW_AT_explicit);
}
+void DwarfUnit::applyVariableAttributes(const DbgVariable &Var,
+ DIE &VariableDie) {
+ StringRef Name = Var.getName();
+ if (!Name.empty())
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(VariableDie, Var.getVariable());
+ addType(VariableDie, Var.getType());
+ if (Var.isArtificial())
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
+}
+
// Return const expression if value is a GEP to access merged global
// constant. e.g.
// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
@@ -1665,10 +1688,8 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
DD->addAccelName(GV.getLinkageName(), AddrDIE);
}
- if (!GV.isLocalToUnit())
- addGlobalName(GV.getName(),
- VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
- GV.getContext());
+ addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
+ GV.getContext());
}
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
@@ -1777,24 +1798,13 @@ std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV,
std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV,
bool Abstract) {
- StringRef Name = DV.getName();
-
// Define variable debug information entry.
auto VariableDie = make_unique<DIE>(DV.getTag());
- DbgVariable *AbsVar = DV.getAbstractVariable();
- if (AbsVar && AbsVar->getDIE())
- addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE());
- else {
- if (!Name.empty())
- addString(*VariableDie, dwarf::DW_AT_name, Name);
- addSourceLine(*VariableDie, DV.getVariable());
- addType(*VariableDie, DV.getType());
- if (DV.isArtificial())
- addFlag(*VariableDie, dwarf::DW_AT_artificial);
- }
- if (Abstract)
+ if (Abstract) {
+ applyVariableAttributes(DV, *VariableDie);
return VariableDie;
+ }
// Add variable address.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index acb7528..b7b83b2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -400,6 +400,8 @@ public:
DIE *getOrCreateSubprogramDIE(DISubprogram SP);
void applySubprogramAttributes(DISubprogram SP, DIE &SPDie);
+ void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
+ void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 3a12c73..73f62bf 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,45 +7,31 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing DWARF exception info into asm files.
+// This file contains support for writing exception info into assembly files.
//
//===----------------------------------------------------------------------===//
-#include "DwarfException.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
+#include "EHStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+
using namespace llvm;
-DwarfException::DwarfException(AsmPrinter *A)
- : Asm(A), MMI(Asm->MMI) {}
+EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
-DwarfException::~DwarfException() {}
+EHStreamer::~EHStreamer() {}
-/// SharedTypeIds - How many leading type ids two landing pads have in common.
-unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
- const LandingPadInfo *R) {
+/// How many leading type ids two landing pads have in common.
+unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
unsigned LSize = LIds.size(), RSize = RIds.size();
unsigned MinSize = LSize < RSize ? LSize : RSize;
@@ -58,10 +44,10 @@ unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
return Count;
}
-/// ComputeActionsTable - Compute the actions table and gather the first action
-/// index for each landing pad site.
-unsigned DwarfException::
-ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+/// Compute the actions table and gather the first action index for each landing
+/// pad site.
+unsigned EHStreamer::
+computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
SmallVectorImpl<ActionEntry> &Actions,
SmallVectorImpl<unsigned> &FirstActions) {
@@ -109,7 +95,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
const LandingPadInfo *LPI = *I;
const std::vector<int> &TypeIds = LPI->TypeIds;
- unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+ unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
unsigned SizeSiteActions = 0;
if (NumShared < TypeIds.size()) {
@@ -167,9 +153,9 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
return SizeActions;
}
-/// CallToNoUnwindFunction - Return `true' if this is a call to a function
-/// marked `nounwind'. Return `false' otherwise.
-bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+/// Return `true' if this is a call to a function marked `nounwind'. Return
+/// `false' otherwise.
+bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
assert(MI->isCall() && "This should be a call instruction!");
bool MarkedNoUnwind = false;
@@ -201,15 +187,14 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
return MarkedNoUnwind;
}
-/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
-/// has a try-range containing the call, a non-zero landing pad, and an
-/// appropriate action. The entry for an ordinary call has a try-range
-/// containing the call and zero for the landing pad and the action. Calls
-/// marked 'nounwind' have no entry and must not be contained in the try-range
-/// of any entry - they form gaps in the table. Entries must be ordered by
-/// try-range address.
-void DwarfException::
-ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+/// Compute the call-site table. The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action. The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action. Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table. Entries must be ordered by try-range address.
+void EHStreamer::
+computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
const RangeMapType &PadMap,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions) {
@@ -228,7 +213,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
for (const auto &MI : MBB) {
if (!MI.isEHLabel()) {
if (MI.isCall())
- SawPotentiallyThrowing |= !CallToNoUnwindFunction(&MI);
+ SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
continue;
}
@@ -308,7 +293,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
}
}
-/// EmitExceptionTable - Emit landing pads and actions.
+/// Emit landing pads and actions.
///
/// The general organization of the table is complex, but the basic concepts are
/// easy. First there is a header which describes the location and organization
@@ -328,7 +313,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
/// unwound and handling continues.
/// 3. Type ID table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reverse indexed base 1.
-void DwarfException::EmitExceptionTable() {
+void EHStreamer::emitExceptionTable() {
const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
@@ -350,7 +335,8 @@ void DwarfException::EmitExceptionTable() {
// landing pad site.
SmallVector<ActionEntry, 32> Actions;
SmallVector<unsigned, 64> FirstActions;
- unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+ unsigned SizeActions =
+ computeActionsTable(LandingPads, Actions, FirstActions);
// Invokes and nounwind calls have entries in PadMap (due to being bracketed
// by try-range labels when lowered). Ordinary calls do not, so appropriate
@@ -368,7 +354,7 @@ void DwarfException::EmitExceptionTable() {
// Compute the call-site table.
SmallVector<CallSiteEntry, 64> CallSites;
- ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+ computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
// Final tallies.
@@ -657,12 +643,12 @@ void DwarfException::EmitExceptionTable() {
Asm->EmitSLEB128(Action.NextAction);
}
- EmitTypeInfos(TTypeEncoding);
+ emitTypeInfos(TTypeEncoding);
Asm->EmitAlignment(2);
}
-void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
+void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
@@ -703,19 +689,18 @@ void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
}
}
-/// endModule - Emit all exception information that should come after the
-/// content.
-void DwarfException::endModule() {
+/// Emit all exception information that should come after the content.
+void EHStreamer::endModule() {
llvm_unreachable("Should be implemented");
}
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
-void DwarfException::beginFunction(const MachineFunction *MF) {
+/// Gather pre-function exception information. Assumes it's being emitted
+/// immediately after the function entry point.
+void EHStreamer::beginFunction(const MachineFunction *MF) {
llvm_unreachable("Should be implemented");
}
-/// endFunction - Gather and emit post-function exception information.
-void DwarfException::endFunction(const MachineFunction *) {
+/// Gather and emit post-function exception information.
+void EHStreamer::endFunction(const MachineFunction *) {
llvm_unreachable("Should be implemented");
}
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
new file mode 100644
index 0000000..2b6ba78
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -0,0 +1,138 @@
+//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing exception info into assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
+
+#include "AsmPrinterHandler.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineInstr;
+class MachineFunction;
+class AsmPrinter;
+
+template <typename T>
+class SmallVectorImpl;
+
+/// Emits exception handling directives.
+class EHStreamer : public AsmPrinterHandler {
+protected:
+ /// Target of directive emission.
+ AsmPrinter *Asm;
+
+ /// Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// How many leading type ids two landing pads have in common.
+ static unsigned sharedTypeIDs(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+ /// Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ unsigned Previous;
+ };
+
+ /// Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ MCSymbol *BeginLabel; // zero indicates the start of the function.
+ MCSymbol *EndLabel; // zero indicates the end of the function.
+
+ // The landing pad starts at PadLabel.
+ MCSymbol *PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ /// Compute the actions table and gather the first action index for each
+ /// landing pad site.
+ unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
+
+ /// Return `true' if this is a call to a function marked `nounwind'. Return
+ /// `false' otherwise.
+ bool callToNoUnwindFunction(const MachineInstr *MI);
+
+ /// Compute the call-site table. The entry for an invoke has a try-range
+ /// containing the call, a non-zero landing pad and an appropriate action.
+ /// The entry for an ordinary call has a try-range containing the call and
+ /// zero for the landing pad and the action. Calls marked 'nounwind' have
+ /// no entry and must not be contained in the try-range of any entry - they
+ /// form gaps in the table. Entries must be ordered by try-range address.
+
+ void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LPs,
+ const SmallVectorImpl<unsigned> &FirstActions);
+
+ /// Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+ void emitExceptionTable();
+
+ virtual void emitTypeInfos(unsigned TTypeEncoding);
+
+public:
+ EHStreamer(AsmPrinter *A);
+ virtual ~EHStreamer();
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+
+ // Unused.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+ void beginInstruction(const MachineInstr *MI) override {}
+ void endInstruction() override {}
+};
+}
+
+#endif
+
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 17d8bff..81285d5 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -38,9 +38,8 @@
using namespace llvm;
Win64Exception::Win64Exception(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
- {}
+ : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
+ shouldEmitMoves(false) {}
Win64Exception::~Win64Exception() {}
@@ -73,14 +72,14 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
if (!shouldEmitPersonality && !shouldEmitMoves)
return;
- Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+ Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym);
if (!shouldEmitPersonality)
return;
- MCSymbol *GCCHandlerSym =
- Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
- Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+ const MCSymbol *PersHandlerSym =
+ TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
+ Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
Asm->getFunctionNumber()));
@@ -99,17 +98,10 @@ void Win64Exception::endFunction(const MachineFunction *) {
MMI->TidyLandingPads();
if (shouldEmitPersonality) {
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
- const MCSymbol *Sym =
- TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-
Asm->OutStreamer.PushSection();
- Asm->OutStreamer.EmitWin64EHHandlerData();
- Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
- 4);
- EmitExceptionTable();
+ Asm->OutStreamer.EmitWinEHHandlerData();
+ emitExceptionTable();
Asm->OutStreamer.PopSection();
}
- Asm->OutStreamer.EmitWin64EHEndProc();
+ Asm->OutStreamer.EmitWinCFIEndProc();
}
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 2212941..6a5c431 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -308,7 +308,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
return;
const Function *GV = MF->getFunction();
- assert(FnDebugInfo.count(GV) == true);
+ assert(FnDebugInfo.count(GV));
assert(CurFn == &FnDebugInfo[GV]);
if (CurFn->Instrs.empty()) {
diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
index d995333..421946d 100644
--- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
+++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
@@ -21,17 +21,19 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "arm-atomic-expand"
namespace {
class AtomicExpandLoadLinked : public FunctionPass {
- const TargetLowering *TLI;
+ const TargetMachine *TM;
public:
static char ID; // Pass identification, replacement for typeid
explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) {
+ : FunctionPass(ID), TM(TM) {
initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
}
@@ -50,29 +52,16 @@ namespace {
char AtomicExpandLoadLinked::ID = 0;
char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
-
-static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) {
- PassInfo *PI = new PassInfo(
- "Expand Atomic calls in terms of load-linked & store-conditional",
- "atomic-ll-sc", &AtomicExpandLoadLinked::ID,
- PassInfo::NormalCtor_t(callDefaultCtor<AtomicExpandLoadLinked>), false,
- false, PassInfo::TargetMachineCtor_t(
- callTargetMachineCtor<AtomicExpandLoadLinked>));
- Registry.registerPass(*PI, true);
- return PI;
-}
-
-void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) {
- CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce)
-}
-
+INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc",
+ "Expand Atomic calls in terms of load-linked & store-conditional",
+ false, false)
FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
return new AtomicExpandLoadLinked(TM);
}
bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
- if (!TLI)
+ if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
return false;
SmallVector<Instruction *, 1> AtomicInsts;
@@ -89,7 +78,7 @@ bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
bool MadeChange = false;
for (Instruction *Inst : AtomicInsts) {
- if (!TLI->shouldExpandAtomicInIR(Inst))
+ if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst))
continue;
if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
@@ -111,13 +100,14 @@ bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
// Load instructions don't actually need a leading fence, even in the
// SequentiallyConsistent case.
AtomicOrdering MemOpOrder =
- TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+ TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic
+ : LI->getOrdering();
// The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
// an ldrexd (A3.5.3).
IRBuilder<> Builder(LI);
- Value *Val =
- TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
+ Value *Val = TM->getTargetLowering()->emitLoadLinked(
+ Builder, LI->getPointerOperand(), MemOpOrder);
insertTrailingFence(Builder, LI->getOrdering());
@@ -178,7 +168,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *Loaded =
+ TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *NewVal;
switch (AI->getOperation()) {
@@ -195,7 +186,7 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::Nand:
- NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
+ NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
"new");
break;
case AtomicRMWInst::Or:
@@ -224,8 +215,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
llvm_unreachable("Unknown atomic op");
}
- Value *StoreSuccess =
- TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
+ Builder, NewVal, Addr, MemOpOrder);
Value *TryAgain = Builder.CreateICmpNE(
StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
@@ -256,19 +247,26 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// %loaded = @load.linked(%addr)
// %should_store = icmp eq %loaded, %desired
// br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.end/%cmpxchg.barrier
+ // label %cmpxchg.failure
// cmpxchg.trystore:
// %stored = @store_conditional(%new, %addr)
- // %try_again = icmp i32 ne %stored, 0
- // br i1 %try_again, label %loop, label %cmpxchg.end
- // cmpxchg.barrier:
+ // %success = icmp eq i32 %stored, 0
+ // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
+ // cmpxchg.success:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.failure:
// fence?
// br label %cmpxchg.end
// cmpxchg.end:
+ // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+ // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
+ // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
- auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB);
- auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB);
+ auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+ auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
// This grabs the DebugLoc from CI
@@ -284,37 +282,82 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *Loaded =
+ TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *ShouldStore =
Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
// If the the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess = TLI->emitStoreConditional(
+ Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
Builder, CI->getNewValOperand(), Addr, MemOpOrder);
- Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
- Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
+ Builder.CreateCondBr(StoreSuccess, SuccessBB,
+ CI->isWeak() ? FailureBB : LoopBB);
- // Finally, make sure later instructions don't get reordered with a fence if
- // necessary.
- Builder.SetInsertPoint(BarrierBB);
+ // Make sure later instructions don't get reordered with a fence if necessary.
+ Builder.SetInsertPoint(SuccessBB);
insertTrailingFence(Builder, SuccessOrder);
Builder.CreateBr(ExitBB);
- CI->replaceAllUsesWith(Loaded);
- CI->eraseFromParent();
+ Builder.SetInsertPoint(FailureBB);
+ insertTrailingFence(Builder, FailureOrder);
+ Builder.CreateBr(ExitBB);
+
+ // Finally, we have control-flow based knowledge of whether the cmpxchg
+ // succeeded or not. We expose this to later passes by converting any
+ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
+ // Setup the builder so we can create any PHIs we need.
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+ Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
+ Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+
+ // Look for any users of the cmpxchg that are just comparing the loaded value
+ // against the desired one, and replace them with the CFG-derived version.
+ SmallVector<ExtractValueInst *, 2> PrunedInsts;
+ for (auto User : CI->users()) {
+ ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
+ if (!EV)
+ continue;
+
+ assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
+ "weird extraction from { iN, i1 }");
+
+ if (EV->getIndices()[0] == 0)
+ EV->replaceAllUsesWith(Loaded);
+ else
+ EV->replaceAllUsesWith(Success);
+
+ PrunedInsts.push_back(EV);
+ }
+
+ // We can remove the instructions now we're no longer iterating through them.
+ for (auto EV : PrunedInsts)
+ EV->eraseFromParent();
+
+ if (!CI->use_empty()) {
+ // Some use of the full struct return that we don't understand has happened,
+ // so we've got to reconstruct it properly.
+ Value *Res;
+ Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ }
+
+ CI->eraseFromParent();
return true;
}
AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
AtomicOrdering Ord) {
- if (!TLI->getInsertFencesForAtomic())
+ if (!TM->getTargetLowering()->getInsertFencesForAtomic())
return Ord;
if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
@@ -327,7 +370,7 @@ AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
AtomicOrdering Ord) {
- if (!TLI->getInsertFencesForAtomic())
+ if (!TM->getTargetLowering()->getInsertFencesForAtomic())
return;
if (Ord == Acquire || Ord == AcquireRelease)
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 7f31b1a..b2737bf 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
/// are set if the result needs to be inserted and/or extracted from vectors.
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+ /// Estimate the cost overhead of SK_Alternate shuffle.
+ unsigned getAltShuffleOverhead(Type *Ty) const;
+
const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
public:
@@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return OpCost;
}
+unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
+ assert(Ty->isVectorTy() && "Can only shuffle vectors");
+ unsigned Cost = 0;
+ // Shuffle cost is equal to the cost of extracting element from its argument
+ // plus the cost of inserting them onto the result vector.
+
+ // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
+ // 0 of first vector, index 1 of second vector,index 2 of first vector and
+ // finally index 3 of second vector and insert them at index <0,1,2,3> of
+ // result vector.
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+ return Cost;
+}
+
unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
+ if (Kind == SK_Alternate) {
+ return getAltShuffleOverhead(Tp);
+ }
return 1;
}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index f623a48..7503e57 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1505,10 +1505,17 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (MO.isUse()) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
Uses.insert(*AI);
- } else if (!MO.isDead())
- // Don't try to hoist code in the rare case the terminator defines a
- // register that is later used.
- return MBB->end();
+ } else {
+ if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+
+ // If the terminator defines a register, make sure we don't hoist
+ // the instruction whose def might be clobbered by the terminator.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Defs.insert(*AI);
+ }
}
if (Uses.empty())
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 0b492a9..57c24e8 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,11 +22,13 @@ add_llvm_library(LLVMCodeGen
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp
+ GlobalMerge.cpp
IfConversion.cpp
InlineSpiller.cpp
InterferenceCache.cpp
IntrinsicLowering.cpp
JITCodeEmitter.cpp
+ JumpInstrTables.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
LexicalScopes.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 6aa60c6..ccac40c 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -151,19 +151,8 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
}
char CodeGenPrepare::ID = 0;
-static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) {
- initializeTargetLibraryInfoPass(Registry);
- PassInfo *PI = new PassInfo(
- "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID,
- PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false,
- PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>));
- Registry.registerPass(*PI, true);
- return PI;
-}
-
-void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) {
- CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce)
-}
+INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
return new CodeGenPrepare(TM);
@@ -1078,8 +1067,11 @@ void ExtAddrMode::print(raw_ostream &OS) const {
NeedPlus = true;
}
- if (BaseOffs)
- OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+ if (BaseOffs) {
+ OS << (NeedPlus ? " + " : "")
+ << BaseOffs;
+ NeedPlus = true;
+ }
if (BaseReg) {
OS << (NeedPlus ? " + " : "")
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 822636f..d3ffcc7 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -146,8 +146,8 @@ static const SDep *CriticalPathStep(const SUnit *SU) {
void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// It's not safe to change register allocation for source operands of
- // that have special allocation requirements. Also assume all registers
- // used in a call must not be changed (ABI).
+ // instructions that have special allocation requirements. Also assume all
+ // registers used in a call must not be changed (ABI).
// FIXME: The issue with predicated instruction is more complex. We are being
// conservative here because the kill markers cannot be trusted after
// if-conversion:
@@ -200,6 +200,28 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
RegRefs.insert(std::make_pair(Reg, &MO));
+ // If this reg is tied and live (Classes[Reg] is set to -1), we can't change
+ // it or any of its sub or super regs. We need to use KeepRegs to mark the
+ // reg because not all uses of the same reg within an instruction are
+ // necessarily tagged as tied.
+ // Example: an x86 "xor %eax, %eax" will have one source operand tied to the
+ // def register but not the second (see PR20020 for details).
+ // FIXME: can this check be relaxed to account for undef uses
+ // of a register? In the above 'xor' example, the uses of %eax are undef, so
+ // earlier instructions could still replace %eax even though the 'xor'
+ // itself can't be changed.
+ if (MI->isRegTiedToUseOperand(i) &&
+ Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs) {
+ KeepRegs.set(*SubRegs);
+ }
+ for (MCSuperRegIterator SuperRegs(Reg, TRI);
+ SuperRegs.isValid(); ++SuperRegs) {
+ KeepRegs.set(*SuperRegs);
+ }
+ }
+
if (MO.isUse() && Special) {
if (!KeepRegs.test(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -236,9 +258,15 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isDef()) continue;
+
+ // If we've already marked this reg as unchangeable, carry on.
+ if (KeepRegs.test(Reg)) continue;
+
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
+ // FIXME: we should use a SubRegIterator that includes self (as above), so
+ // we don't have to repeat all this code for the reg itself.
DefIndices[Reg] = Count;
KillIndices[Reg] = ~0u;
assert(((KillIndices[Reg] == ~0u) !=
@@ -281,6 +309,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
RegRefs.insert(std::make_pair(Reg, &MO));
+ // FIXME: we should use an MCRegAliasIterator that includes self so we don't
+ // have to repeat all this code for the reg itself.
+
// It wasn't previously live but now it is, this is a kill.
if (KillIndices[Reg] == ~0u) {
KillIndices[Reg] = Count;
@@ -309,7 +340,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// the two-address instruction also defines NewReg, as may happen with
// pre/postincrement loads. In this case, both the use and def operands are in
// RegRefs because the def is inserted by PrescanInstruction and not erased
-// during ScanInstruction. So checking for an instructions with definitions of
+// during ScanInstruction. So checking for an instruction with definitions of
// both NewReg and AntiDepReg covers it.
bool
CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
@@ -325,7 +356,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
if (RefOper->isDef() && RefOper->isEarlyClobber())
return true;
- // Handle cases in which this instructions defines NewReg.
+ // Handle cases in which this instruction defines NewReg.
MachineInstr *MI = RefOper->getParent();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &CheckOper = MI->getOperand(i);
@@ -343,11 +374,11 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
return true;
// Don't allow an instruction using AntiDepReg to be earlyclobbered by
- // NewReg
+ // NewReg.
if (CheckOper.isEarlyClobber())
return true;
- // Don't allow inline asm to define NewReg at all. Who know what it's
+ // Don't allow inline asm to define NewReg at all. Who knows what it's
// doing with it.
if (MI->isInlineAsm())
return true;
@@ -494,8 +525,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// as we go to help determine which registers are available.
unsigned Broken = 0;
unsigned Count = InsertPosIndex - 1;
- for (MachineBasicBlock::iterator I = End, E = Begin;
- I != E; --Count) {
+ for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) {
MachineInstr *MI = --I;
if (MI->isDebugValue())
continue;
@@ -526,7 +556,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
else if (KeepRegs.test(AntiDepReg))
- // Don't break anti-dependencies if an use down below requires
+ // Don't break anti-dependencies if a use down below requires
// this exact register.
AntiDepReg = 0;
else {
@@ -564,8 +594,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
- TII->isPredicated(MI))
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
AntiDepReg = 0;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 1949a48..45e4ff5 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -55,12 +55,12 @@ class TargetRegisterInfo;
typedef std::multimap<unsigned, MachineOperand *>::const_iterator
RegRefIter;
- /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// KillIndices - The index of the most recent kill (proceeding bottom-up),
/// or ~0u if the register is not live.
std::vector<unsigned> KillIndices;
- /// DefIndices - The index of the most recent complete def (proceding bottom
- /// up), or ~0u if the register is live.
+ /// DefIndices - The index of the most recent complete def (proceeding
+ /// bottom up), or ~0u if the register is live.
std::vector<unsigned> DefIndices;
/// KeepRegs - A set of registers which are live and cannot be changed to
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
new file mode 100644
index 0000000..027ee38
--- /dev/null
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -0,0 +1,361 @@
+//===-- GlobalMerge.cpp - Internal globals merging -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "global-merge"
+
+cl::opt<bool>
+EnableGlobalMerge("enable-global-merge", cl::Hidden,
+ cl::desc("Enable global merge pass"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
+// FIXME: this could be a transitional option, and we probably need to remove
+// it if only we are sure this optimization could always benefit all targets.
+static cl::opt<bool>
+EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
+ cl::desc("Enable global merge pass on external linkage"),
+ cl::init(false));
+
+STATISTIC(NumMerged , "Number of globals merged");
+namespace {
+ class GlobalMerge : public FunctionPass {
+ const TargetMachine *TM;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst, unsigned AddrSpace) const;
+
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that must not be merged away
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit GlobalMerge(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ bool doFinalization(Module &M) override;
+
+ const char *getPassName() const override {
+ return "Merge internal globals";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char GlobalMerge::ID = 0;
+INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables",
+ false, false)
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst, unsigned AddrSpace) const {
+ const TargetLowering *TLI = TM->getTargetLowering();
+ const DataLayout *DL = TLI->getDataLayout();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(),
+ [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2));
+ });
+
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ assert(Globals.size() > 1);
+
+ // FIXME: This simple solution merges globals all together as maximum as
+ // possible. However, with this solution it would be hard to remove dead
+ // global symbols at link-time. An alternative solution could be checking
+ // global symbols references function by function, and make the symbols
+ // being referred in the same function merged and we would probably need
+ // to introduce heuristic algorithm to solve the merge conflict from
+ // different functions.
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<Type*> Tys;
+ std::vector<Constant*> Inits;
+
+ bool HasExternal = false;
+ GlobalVariable *TheFirstExternal = 0;
+ for (j = i; j != e; ++j) {
+ Type *Ty = Globals[j]->getType()->getElementType();
+ MergedSize += DL->getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+
+ if (Globals[j]->hasExternalLinkage() && !HasExternal) {
+ HasExternal = true;
+ TheFirstExternal = Globals[j];
+ }
+ }
+
+ // If merged variables doesn't have external linkage, we needn't to expose
+ // the symbol after merging.
+ GlobalValue::LinkageTypes Linkage = HasExternal
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::InternalLinkage;
+
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+
+ // If merged variables have external linkage, we use symbol name of the
+ // first variable merged as the suffix of global symbol name. This would
+ // be able to avoid the link-time naming conflict for globalm symbols.
+ GlobalVariable *MergedGV = new GlobalVariable(
+ M, MergedTy, isConst, Linkage, MergedInit,
+ HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName()
+ : "_MergedGlobals",
+ nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+
+ for (size_t k = i; k < j; ++k) {
+ GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
+ std::string Name = Globals[k]->getName();
+
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, k-i)
+ };
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+
+ if (Linkage != GlobalValue::InternalLinkage) {
+ // Generate a new alias...
+ auto *PTy = cast<PointerType>(GEP->getType());
+ GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ Linkage, Name, GEP, &M);
+ }
+
+ NumMerged++;
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the invoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
+
+bool GlobalMerge::doInitialization(Module &M) {
+ if (!EnableGlobalMerge)
+ return false;
+
+ DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
+ BSSGlobals;
+ const TargetLowering *TLI = TM->getTargetLowering();
+ const DataLayout *DL = TLI->getDataLayout();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+ setMustKeepGlobalVariables(M);
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal or external globals only
+ if (I->isDeclaration() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) &&
+ !I->hasInternalLinkage())
+ continue;
+
+ PointerType *PT = dyn_cast<PointerType>(I->getType());
+ assert(PT && "Global variable is not a pointer!");
+
+ unsigned AddressSpace = PT->getAddressSpace();
+
+ // Ignore fancy-aligned globals for now.
+ unsigned Alignment = DL->getPreferredAlignment(I);
+ Type *Ty = I->getType()->getElementType();
+ if (Alignment > DL->getABITypeAlignment(Ty))
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ // Ignore all "required" globals:
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
+ if (DL->getTypeAllocSize(Ty) < MaxOffset) {
+ if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal())
+ BSSGlobals[AddressSpace].push_back(I);
+ else if (I->isConstant())
+ ConstGlobals[AddressSpace].push_back(I);
+ else
+ Globals[AddressSpace].push_back(I);
+ }
+ }
+
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = Globals.begin(), E = Globals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, false, I->first);
+
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, false, I->first);
+
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
+
+ return Changed;
+}
+
+bool GlobalMerge::runOnFunction(Function &F) {
+ return false;
+}
+
+bool GlobalMerge::doFinalization(Module &M) {
+ MustKeepGlobalVariables.clear();
+ return false;
+}
+
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM) {
+ return new GlobalMerge(TM);
+}
diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp
new file mode 100644
index 0000000..61ef722
--- /dev/null
+++ b/lib/CodeGen/JumpInstrTables.cpp
@@ -0,0 +1,301 @@
+//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief An implementation of jump-instruction tables.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jt"
+
+#include "llvm/CodeGen/JumpInstrTables.h"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+
+using namespace llvm;
+
+char JumpInstrTables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables",
+ "Jump-Instruction Tables", true, true)
+INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
+INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables",
+ "Jump-Instruction Tables", true, true)
+
+STATISTIC(NumJumpTables, "Number of indirect call tables generated");
+STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables");
+
+ModulePass *llvm::createJumpInstrTablesPass() {
+ // The default implementation uses a single table for all functions.
+ return new JumpInstrTables(JumpTable::Single);
+}
+
+ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) {
+ return new JumpInstrTables(JTT);
+}
+
+namespace {
+static const char jump_func_prefix[] = "__llvm_jump_instr_table_";
+static const char jump_section_prefix[] = ".jump.instr.table.text.";
+
+// Checks to see if a given CallSite is making an indirect call, including
+// cases where the indirect call is made through a bitcast.
+bool isIndirectCall(CallSite &CS) {
+ if (CS.getCalledFunction())
+ return false;
+
+ // Check the value to see if it is merely a bitcast of a function. In
+ // this case, it will translate to a direct function call in the resulting
+ // assembly, so we won't treat it as an indirect call here.
+ const Value *V = CS.getCalledValue();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
+ }
+
+ // Otherwise, since we know it's a call, it must be an indirect call
+ return true;
+}
+
+// Replaces Functions and GlobalAliases with a different Value.
+bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) {
+ User *Us = U->getUser();
+ if (!Us)
+ return false;
+ if (Instruction *I = dyn_cast<Instruction>(Us)) {
+ CallSite CS(I);
+
+ // Don't do the replacement if this use is a direct call to this function.
+ // If the use is not the called value, then replace it.
+ if (CS && (isIndirectCall(CS) || CS.isCallee(U))) {
+ return false;
+ }
+
+ U->set(V);
+ } else if (Constant *C = dyn_cast<Constant>(Us)) {
+ // Don't replace calls to bitcasts of function symbols, since they get
+ // translated to direct calls.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ // This bitcast must have exactly one user.
+ if (CE->user_begin() != CE->user_end()) {
+ User *ParentUs = *CE->user_begin();
+ if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) {
+ CallSite CS(CI);
+ Use &CEU = *CE->use_begin();
+ if (CS.isCallee(&CEU)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier
+ // requires alias to point to a defined function. So, GlobalAlias is handled
+ // as a separate case in runOnModule.
+ if (!isa<GlobalAlias>(C))
+ C->replaceUsesOfWithOnConstant(GV, V, U);
+ } else {
+ assert(false && "The Use of a Function symbol is neither an instruction nor"
+ " a constant");
+ }
+
+ return true;
+}
+
+// Replaces all replaceable address-taken uses of GV with a pointer to a
+// jump-instruction table entry.
+void replaceValueWithFunction(GlobalValue *GV, Function *F) {
+ // Go through all uses of this function and replace the uses of GV with the
+ // jump-table version of the function. Get the uses as a vector before
+ // replacing them, since replacing them changes the use list and invalidates
+ // the iterator otherwise.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) {
+ Use &U = *I++;
+
+ // Replacement of constants replaces all instances in the constant. So, some
+ // uses might have already been handled by the time we reach them here.
+ if (U.get() == GV)
+ replaceGlobalValueIndirectUse(GV, F, &U);
+ }
+
+ return;
+}
+} // end anonymous namespace
+
+JumpInstrTables::JumpInstrTables()
+ : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0),
+ JTType(JumpTable::Single) {
+ initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
+}
+
+JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT)
+ : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) {
+ initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
+}
+
+JumpInstrTables::~JumpInstrTables() {}
+
+void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<JumpInstrTableInfo>();
+}
+
+Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
+ FunctionType *OrigFunTy = Target->getFunctionType();
+ FunctionType *FunTy = transformType(OrigFunTy);
+
+ JumpMap::iterator it = Metadata.find(FunTy);
+ if (Metadata.end() == it) {
+ struct TableMeta Meta;
+ Meta.TableNum = TableCount;
+ Meta.Count = 0;
+ Metadata[FunTy] = Meta;
+ it = Metadata.find(FunTy);
+ ++NumJumpTables;
+ ++TableCount;
+ }
+
+ it->second.Count++;
+
+ std::string NewName(jump_func_prefix);
+ NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str();
+ Function *JumpFun =
+ Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M);
+ // The section for this table
+ JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str());
+ JITI->insertEntry(FunTy, Target, JumpFun);
+
+ ++NumFuncsInJumpTables;
+ return JumpFun;
+}
+
+bool JumpInstrTables::hasTable(FunctionType *FunTy) {
+ FunctionType *TransTy = transformType(FunTy);
+ return Metadata.end() != Metadata.find(TransTy);
+}
+
+FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
+ // Returning nullptr forces all types into the same table, since all types map
+ // to the same type
+ Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
+
+ // Ignore the return type.
+ Type *RetTy = VoidPtrTy;
+ bool IsVarArg = FunTy->isVarArg();
+ std::vector<Type *> ParamTys(FunTy->getNumParams());
+ FunctionType::param_iterator PI, PE;
+ int i = 0;
+
+ std::vector<Type *> EmptyParams;
+ Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
+ FunctionType *VoidFnTy = FunctionType::get(
+ Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
+ switch (JTType) {
+ case JumpTable::Single:
+
+ return FunctionType::get(RetTy, EmptyParams, false);
+ case JumpTable::Arity:
+ // Transform all types to void* so that all functions with the same arity
+ // end up in the same table.
+ for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
+ PI++, i++) {
+ ParamTys[i] = VoidPtrTy;
+ }
+
+ return FunctionType::get(RetTy, ParamTys, IsVarArg);
+ case JumpTable::Simplified:
+ // Project all parameters types to one of 3 types: composite, integer, and
+ // function, matching the three subclasses of Type.
+ for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
+ ++PI, ++i) {
+ assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) ||
+ isa<CompositeType>(*PI)) &&
+ "This type is not an Integer or a Composite or a Function");
+ if (isa<CompositeType>(*PI)) {
+ ParamTys[i] = VoidPtrTy;
+ } else if (isa<FunctionType>(*PI)) {
+ ParamTys[i] = VoidFnTy;
+ } else if (isa<IntegerType>(*PI)) {
+ ParamTys[i] = Int32Ty;
+ }
+ }
+
+ return FunctionType::get(RetTy, ParamTys, IsVarArg);
+ case JumpTable::Full:
+ // Don't transform this type at all.
+ return FunTy;
+ }
+
+ return nullptr;
+}
+
+bool JumpInstrTables::runOnModule(Module &M) {
+ // Make sure the module is well-formed, especially with respect to jumptable.
+ if (verifyModule(M))
+ return false;
+
+ JITI = &getAnalysis<JumpInstrTableInfo>();
+
+ // Get the set of jumptable-annotated functions.
+ DenseMap<Function *, Function *> Functions;
+ for (Function &F : M) {
+ if (F.hasFnAttribute(Attribute::JumpTable)) {
+ assert(F.hasUnnamedAddr() &&
+ "Attribute 'jumptable' requires 'unnamed_addr'");
+ Functions[&F] = nullptr;
+ }
+ }
+
+ // Create the jump-table functions.
+ for (auto &KV : Functions) {
+ Function *F = KV.first;
+ KV.second = insertEntry(M, F);
+ }
+
+ // GlobalAlias is a special case, because the target of an alias statement
+ // must be a defined function. So, instead of replacing a given function in
+ // the alias, we replace all uses of aliases that target jumptable functions.
+ // Note that there's no need to create these functions, since only aliases
+ // that target known jumptable functions are replaced, and there's no way to
+ // put the jumptable annotation on a global alias.
+ DenseMap<GlobalAlias *, Function *> Aliases;
+ for (GlobalAlias &GA : M.aliases()) {
+ Constant *Aliasee = GA.getAliasee();
+ if (Function *F = dyn_cast<Function>(Aliasee)) {
+ auto it = Functions.find(F);
+ if (it != Functions.end()) {
+ Aliases[&GA] = it->second;
+ }
+ }
+ }
+
+ // Replace each address taken function with its jump-instruction table entry.
+ for (auto &KV : Functions)
+ replaceValueWithFunction(KV.first, KV.second);
+
+ for (auto &KV : Aliases)
+ replaceValueWithFunction(KV.first, KV.second);
+
+ return !Functions.empty();
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index a5ac057..df96b94 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,11 +12,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
+
+#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/JumpInstrTables.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -82,6 +86,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) {
+
// Add internal analysis passes from the target machine.
TM->addAnalysisPasses(PM);
@@ -136,6 +141,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) {
+ // Passes to handle jumptable function annotations. These can't be handled at
+ // JIT time, so we don't add them directly to addPassesToGenerateCode.
+ PM.add(createJumpInstrTableInfoPass());
+ PM.add(createJumpInstrTablesPass(Options.JTType));
+
// Add common CodeGen passes.
MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
StartAfter, StopAfter);
@@ -199,7 +209,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_Null:
// The Null output is intended for use for performance analysis and testing,
// not real users.
- AsmStreamer.reset(createNullStreamer(*Context));
+ AsmStreamer.reset(getTarget().createNullStreamer(*Context));
break;
}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 388f58f..7d5646b 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -329,12 +329,13 @@ class LDVImpl {
void computeIntervals();
public:
- LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false),
- ModifiedMF(false) {}
+ LDVImpl(LiveDebugVariables *ps)
+ : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {}
bool runOnMachineFunction(MachineFunction &mf);
/// clear - Release all memory.
void clear() {
+ MF = nullptr;
userValues.clear();
virtRegToEqClass.clear();
userVarMap.clear();
@@ -693,11 +694,11 @@ void LDVImpl::computeIntervals() {
}
bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ clear();
MF = &mf;
LIS = &pass.getAnalysis<LiveIntervals>();
MDT = &pass.getAnalysis<MachineDominatorTree>();
TRI = mf.getTarget().getRegisterInfo();
- clear();
LS.initialize(mf);
DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
<< mf.getName() << " **********\n");
@@ -712,6 +713,8 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
+ if (!FunctionDIs.count(mf.getFunction()))
+ return false;
if (!pImpl)
pImpl = new LDVImpl(this);
return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
@@ -974,6 +977,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ if (!MF)
+ return;
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
@@ -988,6 +993,10 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
}
+bool LiveDebugVariables::doInitialization(Module &M) {
+ FunctionDIs = makeSubprogramMap(M);
+ return Pass::doInitialization(M);
+}
#ifndef NDEBUG
void LiveDebugVariables::dump() {
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index bb67435..7ec0d17 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -22,6 +22,7 @@
#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
@@ -32,6 +33,7 @@ class VirtRegMap;
class LiveDebugVariables : public MachineFunctionPass {
void *pImpl;
+ DenseMap<const Function*, DISubprogram> FunctionDIs;
public:
static char ID; // Pass identification, replacement for typeid
@@ -64,6 +66,7 @@ private:
bool runOnMachineFunction(MachineFunction &) override;
void releaseMemory() override;
void getAnalysisUsage(AnalysisUsage &) const override;
+ bool doInitialization(Module &) override;
};
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 3563f8e..1559560 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -186,6 +186,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
LRCalc->createDeadDefs(LI);
LRCalc->extendToUses(LI);
+ computeDeadValues(&LI, LI, nullptr, nullptr);
}
void LiveIntervals::computeVirtRegs() {
@@ -412,21 +413,34 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Handle dead values.
bool CanSeparate = false;
+ computeDeadValues(li, NewLR, &CanSeparate, dead);
+
+ // Move the trimmed segments back.
+ li->segments.swap(NewLR.segments);
+ DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
+
+void LiveIntervals::computeDeadValues(LiveInterval *li,
+ LiveRange &LR,
+ bool *CanSeparate,
+ SmallVectorImpl<MachineInstr*> *dead) {
for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
I != E; ++I) {
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def);
- assert(LRI != NewLR.end() && "Missing segment for PHI");
+ LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def);
+ assert(LRI != LR.end() && "Missing segment for PHI");
if (LRI->end != VNI->def.getDeadSlot())
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
VNI->markUnused();
- NewLR.removeSegment(LRI->start, LRI->end);
+ LR.removeSegment(LRI->start, LRI->end);
DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
- CanSeparate = true;
+ if (CanSeparate)
+ *CanSeparate = true;
} else {
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(VNI->def);
@@ -438,11 +452,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
}
}
}
-
- // Move the trimmed segments back.
- li->segments.swap(NewLR.segments);
- DEBUG(dbgs() << "Shrunk: " << *li << '\n');
- return CanSeparate;
}
void LiveIntervals::extendToIndices(LiveRange &LR,
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 0ec5c33..08fef5f 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -332,7 +332,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
}
-void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) {
+void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const {
OS << "BB#" << getNumber();
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index eb3d71f..6138aef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -457,7 +457,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
/// normal 'L' label is returned.
-MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate) const {
const DataLayout *DL = getTarget().getDataLayout();
assert(JumpTableInfo && "No jump tables");
@@ -530,10 +530,9 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
///
int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
unsigned Alignment) {
- Alignment =
- clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Alignment, getFrameLowering()->getStackAlignment());
+ Alignment = clampStackAlignment(
+ !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
+ getFrameLowering()->getStackAlignment());
CreateStackObject(Size, Alignment, true);
int Index = (int)Objects.size() - NumFixedObjects - 1;
ensureMaxAlignment(Alignment);
@@ -548,10 +547,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
const AllocaInst *Alloca) {
HasVarSizedObjects = true;
- Alignment =
- clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Alignment, getFrameLowering()->getStackAlignment());
+ Alignment = clampStackAlignment(
+ !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
+ getFrameLowering()->getStackAlignment());
Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca));
ensureMaxAlignment(Alignment);
return (int)Objects.size()-NumFixedObjects-1;
@@ -571,16 +569,30 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
// object is 16-byte aligned.
unsigned StackAlign = getFrameLowering()->getStackAlignment();
unsigned Align = MinAlign(SPOffset, StackAlign);
- Align =
- clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Align, getFrameLowering()->getStackAlignment());
+ Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+ !RealignOption,
+ Align, getFrameLowering()->getStackAlignment());
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
/*isSS*/ false,
/*Alloca*/ nullptr));
return -++NumFixedObjects;
}
+/// CreateFixedSpillStackObject - Create a spill slot at a fixed location
+/// on the stack. Returns an index with a negative value.
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
+ int64_t SPOffset) {
+ unsigned StackAlign = getFrameLowering()->getStackAlignment();
+ unsigned Align = MinAlign(SPOffset, StackAlign);
+ Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+ !RealignOption,
+ Align, getFrameLowering()->getStackAlignment());
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
+ /*Immutable*/ true,
+ /*isSS*/ true,
+ /*Alloca*/ nullptr));
+ return -++NumFixedObjects;
+}
BitVector
MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
@@ -849,11 +861,10 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
return false;
-
+
// For now, only support constants with the same size.
uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
- if (StoreSize != TD->getTypeStoreSize(B->getType()) ||
- StoreSize > 128)
+ if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128)
return false;
Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
@@ -882,7 +893,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
/// an existing one. User must specify the log2 of the minimum required
/// alignment for the object.
///
-unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
unsigned Alignment) {
assert(Alignment && "Alignment must be specified!");
if (Alignment > PoolAlignment) PoolAlignment = Alignment;
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 23847d6..44191f7 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -333,6 +333,12 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
if (skipOptnoneFunction(*mf.getFunction()))
return false;
+ const TargetSubtargetInfo &ST =
+ mf.getTarget().getSubtarget<TargetSubtargetInfo>();
+ if (!ST.enablePostMachineScheduler()) {
+ DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
+ return false;
+ }
DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
@@ -472,14 +478,13 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
// unimplemented
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void ReadyQueue::dump() {
dbgs() << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
}
-#endif
//===----------------------------------------------------------------------===//
// ScheduleDAGMI - Basic machine instruction scheduling. This is
@@ -529,6 +534,11 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
llvm_unreachable(nullptr);
}
#endif
+ // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
+ // CurrCycle may have advanced since then.
+ if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
+ SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
+
--SuccSU->NumPredsLeft;
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
SchedImpl->releaseTopNode(SuccSU);
@@ -563,6 +573,11 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
llvm_unreachable(nullptr);
}
#endif
+ // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
+ // CurrCycle may have advanced since then.
+ if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
+ PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
+
--PredSU->NumSuccsLeft;
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
SchedImpl->releaseBottomNode(PredSU);
@@ -674,10 +689,13 @@ void ScheduleDAGMI::schedule() {
CurrentBottom = MI;
}
}
- updateQueues(SU, IsTopNode);
-
- // Notify the scheduling strategy after updating the DAG.
+ // Notify the scheduling strategy before updating the DAG.
+ // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
+ // runs, it can then use the accurate ReadyCycle time to determine whether
+ // newly released nodes can move to the readyQ.
SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
}
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
@@ -1568,7 +1586,7 @@ void SchedBoundary::reset() {
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
// reserved (SchedBoundary::ReservedCycles).
- MaxObservedLatency = 0;
+ MaxObservedStall = 0;
#endif
// Reserve a zero-count for invalid CritResIdx.
ExecutedResCounts.resize(1);
@@ -1668,8 +1686,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
- if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+ unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
+ if (NRCycle > CurrCycle) {
+#ifndef NDEBUG
+ MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
+#endif
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
+ << SchedModel->getResourceName(PI->ProcResourceIdx)
+ << "=" << NRCycle << "c\n");
return true;
+ }
}
}
return false;
@@ -1725,6 +1751,16 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
}
void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+#ifndef NDEBUG
+ // ReadyCycle was been bumped up to the CurrCycle when this node was
+ // scheduled, but CurrCycle may have been eagerly advanced immediately after
+ // scheduling, so may now be greater than ReadyCycle.
+ if (ReadyCycle > CurrCycle)
+ MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
+#endif
+
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
@@ -1744,18 +1780,6 @@ void SchedBoundary::releaseTopNode(SUnit *SU) {
if (SU->isScheduled)
return;
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isWeak())
- continue;
- unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
- unsigned Latency = I->getLatency();
-#ifndef NDEBUG
- MaxObservedLatency = std::max(Latency, MaxObservedLatency);
-#endif
- if (SU->TopReadyCycle < PredReadyCycle + Latency)
- SU->TopReadyCycle = PredReadyCycle + Latency;
- }
releaseNode(SU, SU->TopReadyCycle);
}
@@ -1763,20 +1787,6 @@ void SchedBoundary::releaseBottomNode(SUnit *SU) {
if (SU->isScheduled)
return;
- assert(SU->getInstr() && "Scheduled SUnit must have instr");
-
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isWeak())
- continue;
- unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
- unsigned Latency = I->getLatency();
-#ifndef NDEBUG
- MaxObservedLatency = std::max(Latency, MaxObservedLatency);
-#endif
- if (SU->BotReadyCycle < SuccReadyCycle + Latency)
- SU->BotReadyCycle = SuccReadyCycle + Latency;
- }
releaseNode(SU, SU->BotReadyCycle);
}
@@ -1943,10 +1953,12 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
- ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
-#ifndef NDEBUG
- MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency);
-#endif
+ if (isTop()) {
+ ReservedCycles[PIdx] =
+ std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
+ }
+ else
+ ReservedCycles[PIdx] = NextCycle;
}
}
}
@@ -2049,8 +2061,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
}
}
for (unsigned i = 0; Available.empty(); ++i) {
- assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
- "permanent hazard"); (void)i;
+// FIXME: Re-enable assert once PR20057 is resolved.
+// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
+// "permanent hazard");
+ (void)i;
bumpCycle(CurrCycle + 1);
releasePending();
}
@@ -2090,111 +2104,6 @@ void SchedBoundary::dumpScheduledState() {
// GenericScheduler - Generic implementation of MachineSchedStrategy.
//===----------------------------------------------------------------------===//
-namespace {
-/// Base class for GenericScheduler. This class maintains information about
-/// scheduling candidates based on TargetSchedModel making it easy to implement
-/// heuristics for either preRA or postRA scheduling.
-class GenericSchedulerBase : public MachineSchedStrategy {
-public:
- /// Represent the type of SchedCandidate found within a single queue.
- /// pickNodeBidirectional depends on these listed by decreasing priority.
- enum CandReason {
- NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
- ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
- TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
-
-#ifndef NDEBUG
- static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
-#endif
-
- /// Policy for scheduling the next instruction in the candidate's zone.
- struct CandPolicy {
- bool ReduceLatency;
- unsigned ReduceResIdx;
- unsigned DemandResIdx;
-
- CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
- };
-
- /// Status of an instruction's critical resource consumption.
- struct SchedResourceDelta {
- // Count critical resources in the scheduled region required by SU.
- unsigned CritResources;
-
- // Count critical resources from another region consumed by SU.
- unsigned DemandedResources;
-
- SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
-
- bool operator==(const SchedResourceDelta &RHS) const {
- return CritResources == RHS.CritResources
- && DemandedResources == RHS.DemandedResources;
- }
- bool operator!=(const SchedResourceDelta &RHS) const {
- return !operator==(RHS);
- }
- };
-
- /// Store the state used by GenericScheduler heuristics, required for the
- /// lifetime of one invocation of pickNode().
- struct SchedCandidate {
- CandPolicy Policy;
-
- // The best SUnit candidate.
- SUnit *SU;
-
- // The reason for this candidate.
- CandReason Reason;
-
- // Set of reasons that apply to multiple candidates.
- uint32_t RepeatReasonSet;
-
- // Register pressure values for the best candidate.
- RegPressureDelta RPDelta;
-
- // Critical resource consumption of the best candidate.
- SchedResourceDelta ResDelta;
-
- SchedCandidate(const CandPolicy &policy)
- : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {}
-
- bool isValid() const { return SU; }
-
- // Copy the status of another candidate without changing policy.
- void setBest(SchedCandidate &Best) {
- assert(Best.Reason != NoCand && "uninitialized Sched candidate");
- SU = Best.SU;
- Reason = Best.Reason;
- RPDelta = Best.RPDelta;
- ResDelta = Best.ResDelta;
- }
-
- bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
- void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
-
- void initResourceDelta(const ScheduleDAGMI *DAG,
- const TargetSchedModel *SchedModel);
- };
-
-protected:
- const MachineSchedContext *Context;
- const TargetSchedModel *SchedModel;
- const TargetRegisterInfo *TRI;
-
- SchedRemainder Rem;
-protected:
- GenericSchedulerBase(const MachineSchedContext *C):
- Context(C), SchedModel(nullptr), TRI(nullptr) {}
-
- void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
- SchedBoundary *OtherZone);
-
-#ifndef NDEBUG
- void traceCandidate(const SchedCandidate &Cand);
-#endif
-};
-} // namespace
-
void GenericSchedulerBase::SchedCandidate::
initResourceDelta(const ScheduleDAGMI *DAG,
const TargetSchedModel *SchedModel) {
@@ -2430,65 +2339,6 @@ static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
<< GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
}
-namespace {
-/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
-/// the schedule.
-class GenericScheduler : public GenericSchedulerBase {
- ScheduleDAGMILive *DAG;
-
- // State of the top and bottom scheduled instruction boundaries.
- SchedBoundary Top;
- SchedBoundary Bot;
-
- MachineSchedPolicy RegionPolicy;
-public:
- GenericScheduler(const MachineSchedContext *C):
- GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"),
- Bot(SchedBoundary::BotQID, "BotQ") {}
-
- void initPolicy(MachineBasicBlock::iterator Begin,
- MachineBasicBlock::iterator End,
- unsigned NumRegionInstrs) override;
-
- bool shouldTrackPressure() const override {
- return RegionPolicy.ShouldTrackPressure;
- }
-
- void initialize(ScheduleDAGMI *dag) override;
-
- SUnit *pickNode(bool &IsTopNode) override;
-
- void schedNode(SUnit *SU, bool IsTopNode) override;
-
- void releaseTopNode(SUnit *SU) override {
- Top.releaseTopNode(SU);
- }
-
- void releaseBottomNode(SUnit *SU) override {
- Bot.releaseBottomNode(SU);
- }
-
- void registerRoots() override;
-
-protected:
- void checkAcyclicLatency();
-
- void tryCandidate(SchedCandidate &Cand,
- SchedCandidate &TryCand,
- SchedBoundary &Zone,
- const RegPressureTracker &RPTracker,
- RegPressureTracker &TempTracker);
-
- SUnit *pickNodeBidirectional(bool &IsTopNode);
-
- void pickNodeFromQueue(SchedBoundary &Zone,
- const RegPressureTracker &RPTracker,
- SchedCandidate &Candidate);
-
- void reschedulePhysRegCopies(SUnit *SU, bool isTop);
-};
-} // namespace
-
void GenericScheduler::initialize(ScheduleDAGMI *dag) {
assert(dag->hasVRegLiveness() &&
"(PreRA)GenericScheduler needs vreg liveness");
@@ -3023,75 +2873,25 @@ GenericSchedRegistry("converge", "Standard converging scheduler.",
// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
//===----------------------------------------------------------------------===//
-namespace {
-/// PostGenericScheduler - Interface to the scheduling algorithm used by
-/// ScheduleDAGMI.
-///
-/// Callbacks from ScheduleDAGMI:
-/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
-class PostGenericScheduler : public GenericSchedulerBase {
- ScheduleDAGMI *DAG;
- SchedBoundary Top;
- SmallVector<SUnit*, 8> BotRoots;
-public:
- PostGenericScheduler(const MachineSchedContext *C):
- GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
-
- virtual ~PostGenericScheduler() {}
-
- void initPolicy(MachineBasicBlock::iterator Begin,
- MachineBasicBlock::iterator End,
- unsigned NumRegionInstrs) override {
- /* no configurable policy */
- };
-
- /// PostRA scheduling does not track pressure.
- bool shouldTrackPressure() const override { return false; }
-
- void initialize(ScheduleDAGMI *Dag) override {
- DAG = Dag;
- SchedModel = DAG->getSchedModel();
- TRI = DAG->TRI;
-
- Rem.init(DAG, SchedModel);
- Top.init(DAG, SchedModel, &Rem);
- BotRoots.clear();
-
- // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
- // or are disabled, then these HazardRecs will be disabled.
- const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
- const TargetMachine &TM = DAG->MF.getTarget();
- if (!Top.HazardRec) {
- Top.HazardRec =
- TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
- }
- }
-
- void registerRoots() override;
-
- SUnit *pickNode(bool &IsTopNode) override;
-
- void scheduleTree(unsigned SubtreeID) override {
- llvm_unreachable("PostRA scheduler does not support subtree analysis.");
- }
-
- void schedNode(SUnit *SU, bool IsTopNode) override;
+void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
+ DAG = Dag;
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
- void releaseTopNode(SUnit *SU) override {
- Top.releaseTopNode(SU);
- }
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ BotRoots.clear();
- // Only called for roots.
- void releaseBottomNode(SUnit *SU) override {
- BotRoots.push_back(SU);
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
+ // or are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ const TargetMachine &TM = DAG->MF.getTarget();
+ if (!Top.HazardRec) {
+ Top.HazardRec =
+ TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
}
+}
-protected:
- void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
-
- void pickNodeFromQueue(SchedCandidate &Cand);
-};
-} // namespace
void PostGenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index b3f7198..249b2d0 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -30,11 +30,6 @@
using namespace llvm;
-namespace llvm {
-extern cl::opt<bool> EnableStackMapLiveness;
-extern cl::opt<bool> EnablePatchPointLiveness;
-}
-
static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
cl::desc("Disable Post Regalloc"));
static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -92,9 +87,9 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
-// substitutePass(&PostRASchedulerID, &MachineSchedulerID); Ideally it wouldn't
-// be part of the standard pass pipeline, and the target would just add a PostRA
-// scheduling pass wherever it wants.
+// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it
+// wouldn't be part of the standard pass pipeline, and the target would just add
+// a PostRA scheduling pass wherever it wants.
static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
@@ -421,7 +416,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// FALLTHROUGH
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- case ExceptionHandling::Win64:
+ case ExceptionHandling::WinEH:
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
@@ -566,8 +561,7 @@ void TargetPassConfig::addMachinePasses() {
if (addPreEmitPass())
printAndVerify("After PreEmit passes");
- if (EnableStackMapLiveness || EnablePatchPointLiveness)
- addPass(&StackMapLivenessID);
+ addPass(&StackMapLivenessID);
}
/// Add passes that optimize machine instructions in SSA form.
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index eeee93a..716cb1f 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -91,6 +91,10 @@ static cl::opt<bool>
DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
cl::desc("Disable the peephole optimizer"));
+static cl::opt<bool>
+DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true),
+ cl::desc("Disable advanced copy optimization"));
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -137,6 +141,105 @@ namespace {
bool isLoadFoldable(MachineInstr *MI,
SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
};
+
+ /// \brief Helper class to track the possible sources of a value defined by
+ /// a (chain of) copy related instructions.
+ /// Given a definition (instruction and definition index), this class
+ /// follows the use-def chain to find successive suitable sources.
+ /// The given source can be used to rewrite the definition into
+ /// def = COPY src.
+ ///
+ /// For instance, let us consider the following snippet:
+ /// v0 =
+ /// v2 = INSERT_SUBREG v1, v0, sub0
+ /// def = COPY v2.sub0
+ ///
+ /// Using a ValueTracker for def = COPY v2.sub0 will give the following
+ /// suitable sources:
+ /// v2.sub0 and v0.
+ /// Then, def can be rewritten into def = COPY v0.
+ class ValueTracker {
+ private:
+ /// The current point into the use-def chain.
+ const MachineInstr *Def;
+ /// The index of the definition in Def.
+ unsigned DefIdx;
+ /// The sub register index of the definition.
+ unsigned DefSubReg;
+ /// The register where the value can be found.
+ unsigned Reg;
+ /// Specifiy whether or not the value tracking looks through
+ /// complex instructions. When this is false, the value tracker
+ /// bails on everything that is not a copy or a bitcast.
+ ///
+ /// Note: This could have been implemented as a specialized version of
+ /// the ValueTracker class but that would have complicated the code of
+ /// the users of this class.
+ bool UseAdvancedTracking;
+ /// Optional MachineRegisterInfo used to perform some complex
+ /// tracking.
+ const MachineRegisterInfo *MRI;
+
+ /// \brief Dispatcher to the right underlying implementation of
+ /// getNextSource.
+ bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for Copy instructions.
+ bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for Bitcast instructions.
+ bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for RegSequence
+ /// instructions.
+ bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for InsertSubreg
+ /// instructions.
+ bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for ExtractSubreg
+ /// instructions.
+ bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for SubregToReg
+ /// instructions.
+ bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg);
+
+ public:
+ /// \brief Create a ValueTracker instance for the value defines by \p MI
+ /// at the operand index \p DefIdx.
+ /// \p DefSubReg represents the sub register index the value tracker will
+ /// track. It does not need to match the sub register index used in \p MI.
+ /// \p UseAdvancedTracking specifies whether or not the value tracker looks
+ /// through complex instructions. By default (false), it handles only copy
+ /// and bitcast instructions.
+ /// \p MRI useful to perform some complex checks.
+ ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
+ bool UseAdvancedTracking = false,
+ const MachineRegisterInfo *MRI = nullptr)
+ : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
+ UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) {
+ assert(Def->getOperand(DefIdx).isDef() &&
+ Def->getOperand(DefIdx).isReg() &&
+ "Definition does not match machine instruction");
+ // Initially the value is in the defined register.
+ Reg = Def->getOperand(DefIdx).getReg();
+ }
+
+ /// \brief Following the use-def chain, get the next available source
+ /// for the tracked value.
+ /// When the returned value is not nullptr, getReg() gives the register
+ /// that contain the tracked value.
+ /// \note The sub register index returned in \p SrcSubReg must be used
+ /// on that getReg() to access the actual value.
+ /// \return Unless the returned value is nullptr (i.e., no source found),
+ /// \p SrcIdx gives the index of the next source in the returned
+ /// instruction and \p SrcSubReg the index to be used on that source to
+ /// get the tracked value. When nullptr is returned, no alternative source
+ /// has been found.
+ const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg);
+
+ /// \brief Get the last register where the initial value can be found.
+ /// Initially this is the register of the definition.
+ /// Then, after each successful call to getNextSource, this is the
+ /// register of the last source.
+ unsigned getReg() const { return Reg; }
+ };
}
char PeepholeOptimizer::ID = 0;
@@ -443,31 +546,32 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
unsigned Src;
unsigned SrcSubReg;
bool ShouldRewrite = false;
- MachineInstr *Copy = MI;
const TargetRegisterInfo &TRI = *TM->getRegisterInfo();
- // Follow the chain of copies until we reach the top or find a
- // more suitable source.
+ // Follow the chain of copies until we reach the top of the use-def chain
+ // or find a more suitable source.
+ ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI);
do {
- unsigned CopyDefIdx, CopySrcIdx;
- if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx))
+ unsigned CopySrcIdx, CopySrcSubReg;
+ if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg))
break;
- const MachineOperand &MO = Copy->getOperand(CopySrcIdx);
- assert(MO.isReg() && "Copies must be between registers.");
- Src = MO.getReg();
-
+ Src = ValTracker.getReg();
+ SrcSubReg = CopySrcSubReg;
+
+ // Do not extend the live-ranges of physical registers as they add
+ // constraints to the register allocator.
+ // Moreover, if we want to extend the live-range of a physical register,
+ // unlike SSA virtual register, we will have to check that they are not
+ // redefine before the related use.
if (TargetRegisterInfo::isPhysicalRegister(Src))
break;
const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
- SrcSubReg = MO.getSubReg();
// If this source does not incur a cross register bank copy, use it.
ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
SrcSubReg);
- // Follow the chain of copies: get the definition of Src.
- Copy = MRI->getVRegDef(Src);
- } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast()));
+ } while (!ShouldRewrite);
// If we did not find a more suitable source, there is nothing to optimize.
if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg())
@@ -483,6 +587,9 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
MRI->replaceRegWith(Def, NewVR);
MRI->clearKillFlags(NewVR);
+ // We extended the lifetime of Src.
+ // Clear the kill flags to account for that.
+ MRI->clearKillFlags(Src);
MI->eraseFromParent();
++NumCopiesBitcasts;
return true;
@@ -673,3 +780,251 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+
+bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isCopy() && "Invalid definition");
+ // Copy instruction are supposed to be: Def = Src.
+ // If someone breaks this assumption, bad things will happen everywhere.
+ assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands");
+
+ if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+ // If we look for a different subreg, it means we want a subreg of src.
+ // Bails as we do not support composing subreg yet.
+ return false;
+ // Otherwise, we want the whole source.
+ SrcIdx = 1;
+ SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ return true;
+}
+
+bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isBitcast() && "Invalid definition");
+
+ // Bail if there are effects that a plain copy will not expose.
+ if (Def->hasUnmodeledSideEffects())
+ return false;
+
+ // Bitcasts with more than one def are not supported.
+ if (Def->getDesc().getNumDefs() != 1)
+ return false;
+ if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+ // If we look for a different subreg, it means we want a subreg of the src.
+ // Bails as we do not support composing subreg yet.
+ return false;
+
+ SrcIdx = Def->getDesc().getNumOperands();
+ for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
+ ++OpIdx) {
+ const MachineOperand &MO = Def->getOperand(OpIdx);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ assert(!MO.isDef() && "We should have skipped all the definitions by now");
+ if (SrcIdx != EndOpIdx)
+ // Multiple sources?
+ return false;
+ SrcIdx = OpIdx;
+ }
+ SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ return true;
+}
+
+bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isRegSequence() && "Invalid definition");
+
+ if (Def->getOperand(DefIdx).getSubReg())
+ // If we are composing subreg, bails out.
+ // The case we are checking is Def.<subreg> = REG_SEQUENCE.
+ // This should almost never happen as the SSA property is tracked at
+ // the register level (as opposed to the subreg level).
+ // I.e.,
+ // Def.sub0 =
+ // Def.sub1 =
+ // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
+ // Def. Thus, it must not be generated.
+ // However, some code could theoretically generates a single
+ // Def.sub0 (i.e, not defining the other subregs) and we would
+ // have this case.
+ // If we can ascertain (or force) that this never happens, we could
+ // turn that into an assertion.
+ return false;
+
+ // We are looking at:
+ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+ // Check if one of the operand defines the subreg we are interested in.
+ for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands();
+ OpIdx != EndOpIdx; OpIdx += 2) {
+ const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ if (MOSubIdx.getImm() == DefSubReg) {
+ assert(Def->getOperand(OpIdx).isReg() &&
+ "One of the source of the reg_sequence is not a register");
+ SrcIdx = OpIdx;
+ SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ return true;
+ }
+ }
+
+ // If the subreg we are tracking is super-defined by another subreg,
+ // we could follow this value. However, this would require to compose
+ // the subreg and we do not do that for now.
+ return false;
+}
+
+bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isInsertSubreg() && "Invalid definition");
+ if (Def->getOperand(DefIdx).getSubReg())
+ // If we are composing subreg, bails out.
+ // Same remark as getNextSourceFromRegSequence.
+ // I.e., this may be turned into an assert.
+ return false;
+
+ // We are looking at:
+ // Def = INSERT_SUBREG v0, v1, sub1
+ // There are two cases:
+ // 1. DefSubReg == sub1, get v1.
+ // 2. DefSubReg != sub1, the value may be available through v0.
+
+ // #1 Check if the inserted register matches the require sub index.
+ unsigned InsertedSubReg = Def->getOperand(3).getImm();
+ if (InsertedSubReg == DefSubReg) {
+ SrcIdx = 2;
+ SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ return true;
+ }
+ // #2 Otherwise, if the sub register we are looking for is not partial
+ // defined by the inserted element, we can look through the main
+ // register (v0).
+ // To check the overlapping we need a MRI and a TRI.
+ if (!MRI)
+ return false;
+
+ const MachineOperand &MODef = Def->getOperand(DefIdx);
+ const MachineOperand &MOBase = Def->getOperand(1);
+ // If the result register (Def) and the base register (v0) do not
+ // have the same register class or if we have to compose
+ // subregisters, bails out.
+ if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) ||
+ MOBase.getSubReg())
+ return false;
+
+ // Get the TRI and check if inserted sub register overlaps with the
+ // sub register we are tracking.
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ if (!TRI ||
+ (TRI->getSubRegIndexLaneMask(DefSubReg) &
+ TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0)
+ return false;
+ // At this point, the value is available in v0 via the same subreg
+ // we used for Def.
+ SrcIdx = 1;
+ SrcSubReg = DefSubReg;
+ return true;
+}
+
+bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isExtractSubreg() && "Invalid definition");
+ // We are looking at:
+ // Def = EXTRACT_SUBREG v0, sub0
+
+ // Bails if we have to compose sub registers.
+ // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
+ if (DefSubReg)
+ return false;
+
+ // Bails if we have to compose sub registers.
+ // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
+ if (Def->getOperand(1).getSubReg())
+ return false;
+ // Otherwise, the value is available in the v0.sub0.
+ SrcIdx = 1;
+ SrcSubReg = Def->getOperand(2).getImm();
+ return true;
+}
+
+bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isSubregToReg() && "Invalid definition");
+ // We are looking at:
+ // Def = SUBREG_TO_REG Imm, v0, sub0
+
+ // Bails if we have to compose sub registers.
+ // If DefSubReg != sub0, we would have to check that all the bits
+ // we track are included in sub0 and if yes, we would have to
+ // determine the right subreg in v0.
+ if (DefSubReg != Def->getOperand(3).getImm())
+ return false;
+ // Bails if we have to compose sub registers.
+ // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
+ if (Def->getOperand(2).getSubReg())
+ return false;
+
+ SrcIdx = 2;
+ SrcSubReg = Def->getOperand(3).getImm();
+ return true;
+}
+
+bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) {
+ assert(Def && "This method needs a valid definition");
+
+ assert(
+ (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
+ Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
+ if (Def->isCopy())
+ return getNextSourceFromCopy(SrcIdx, SrcSubReg);
+ if (Def->isBitcast())
+ return getNextSourceFromBitcast(SrcIdx, SrcSubReg);
+ // All the remaining cases involve "complex" instructions.
+ // Bails if we did not ask for the advanced tracking.
+ if (!UseAdvancedTracking)
+ return false;
+ if (Def->isRegSequence())
+ return getNextSourceFromRegSequence(SrcIdx, SrcSubReg);
+ if (Def->isInsertSubreg())
+ return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg);
+ if (Def->isExtractSubreg())
+ return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg);
+ if (Def->isSubregToReg())
+ return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg);
+ return false;
+}
+
+const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ // If we reach a point where we cannot move up in the use-def chain,
+ // there is nothing we can get.
+ if (!Def)
+ return nullptr;
+
+ const MachineInstr *PrevDef = nullptr;
+ // Try to find the next source.
+ if (getNextSourceImpl(SrcIdx, SrcSubReg)) {
+ // Update definition, definition index, and subregister for the
+ // next call of getNextSource.
+ const MachineOperand &MO = Def->getOperand(SrcIdx);
+ assert(MO.isReg() && !MO.isDef() && "Source is invalid");
+ // Update the current register.
+ Reg = MO.getReg();
+ // Update the return value before moving up in the use-def chain.
+ PrevDef = Def;
+ // If we can still move up in the use-def chain, move to the next
+ // defintion.
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Def = MRI->getVRegDef(Reg);
+ DefIdx = MRI->def_begin(Reg).getOperandNo();
+ DefSubReg = SrcSubReg;
+ return PrevDef;
+ }
+ }
+ // If we end up here, this means we will not be able to find another source
+ // for the next iteration.
+ // Make sure any new call to getNextSource bails out early by cutting the
+ // use-def chain.
+ Def = nullptr;
+ return PrevDef;
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index c74a42f..b98d210 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -160,7 +160,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
replaceFrameIndices(Fn);
// If register scavenging is needed, as we've enabled doing it as a
- // post-pass, scavenge the virtual registers that frame index elimiation
+ // post-pass, scavenge the virtual registers that frame index elimination
// inserted.
if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
scavengeFrameVirtualRegs(Fn);
@@ -268,51 +268,56 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
}
}
- if (CSI.empty())
- return; // Early exit if no callee saved registers are modified!
+ if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+ // If target doesn't implement this, use generic code.
- unsigned NumFixedSpillSlots;
- const TargetFrameLowering::SpillSlot *FixedSpillSlots =
- TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
- // Now that we know which registers need to be saved and restored, allocate
- // stack slots for them.
- for (std::vector<CalleeSavedInfo>::iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
- int FrameIdx;
- if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
- I->setFrameIdx(FrameIdx);
- continue;
- }
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
+ I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
- // Check to see if this physreg must be spilled to a particular stack slot
- // on this target.
- const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
- while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
- FixedSlot->Reg != Reg)
- ++FixedSlot;
-
- if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
- // Nope, just spill it anywhere convenient.
- unsigned Align = RC->getAlignment();
- unsigned StackAlign = TFI->getStackAlignment();
-
- // We may not be able to satisfy the desired alignment specification of
- // the TargetRegisterClass if the stack alignment is smaller. Use the
- // min.
- Align = std::min(Align, StackAlign);
- FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
- if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
- if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
- } else {
- // Spill it to the stack where we must.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
- }
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ }
- I->setFrameIdx(FrameIdx);
+ I->setFrameIdx(FrameIdx);
+ }
}
MFI->setCalleeSavedInfo(CSI);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index aa7c178..901b993 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -44,6 +44,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <queue>
using namespace llvm;
@@ -79,6 +80,12 @@ ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
cl::desc("Exhaustive Search for registers bypassing the depth "
"and interference cutoffs of last chance recoloring"));
+static cl::opt<bool> EnableLocalReassignment(
+ "enable-local-reassign", cl::Hidden,
+ cl::desc("Local reassignment can yield better allocation decisions, but "
+ "may be compile time intensive"),
+ cl::init(false));
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned>
CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -285,6 +292,10 @@ class RAGreedy : public MachineFunctionPass,
/// Callee-save register cost, calculated once per machine function.
BlockFrequency CSRCost;
+ /// Run or not the local reassignment heuristic. This information is
+ /// obtained from the TargetSubtargetInfo.
+ bool EnableLocalReassign;
+
public:
RAGreedy();
@@ -731,7 +742,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
// Evicting another local live range in this case could lead to suboptimal
// coloring.
if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
- !canReassign(*Intf, PhysReg)) {
+ (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
return false;
}
}
@@ -2308,9 +2319,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
<< "********** Function: " << mf.getName() << '\n');
MF = &mf;
- TRI = MF->getTarget().getRegisterInfo();
- TII = MF->getTarget().getInstrInfo();
+ const TargetMachine &TM = MF->getTarget();
+ TRI = TM.getRegisterInfo();
+ TII = TM.getInstrInfo();
RCI.runOnMachineFunction(mf);
+
+ EnableLocalReassign = EnableLocalReassignment ||
+ TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel());
+
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index b2909e0..617e459 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -41,7 +41,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
const TargetRegisterInfo *TRI) {
bool Empty = true;
@@ -55,6 +55,7 @@ void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
dbgs() << "\n";
}
+LLVM_DUMP_METHOD
void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << "Max Pressure: ";
dumpRegSetPressure(MaxSetPressure, TRI);
@@ -68,6 +69,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << '\n';
}
+LLVM_DUMP_METHOD
void RegPressureTracker::dump() const {
if (!isTopClosed() || !isBottomClosed()) {
dbgs() << "Curr Pressure: ";
@@ -75,7 +77,6 @@ void RegPressureTracker::dump() const {
}
P.dump(TRI);
}
-#endif
/// Increase the current pressure as impacted by these registers and bump
/// the high water mark if needed.
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 92a9a30..0f8b21c 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1508,7 +1508,7 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void ILPValue::print(raw_ostream &OS) const {
OS << InstrCount << " / " << Length << " = ";
if (!Length)
@@ -1517,16 +1517,17 @@ void ILPValue::print(raw_ostream &OS) const {
OS << format("%g", ((double)InstrCount / Length));
}
+LLVM_DUMP_METHOD
void ILPValue::dump() const {
dbgs() << *this << '\n';
}
namespace llvm {
+LLVM_DUMP_METHOD
raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
Val.print(OS);
return OS;
}
} // namespace llvm
-#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2d2fd53..7c42e4d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -167,9 +167,18 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
- SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
+ /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// load.
+ ///
+ /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+ /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+ /// \param EltNo index of the vector element to load.
+ /// \param OriginalLoad load that EVE came from to be replaced.
+ /// \returns EVE on success SDValue() on failure.
+ SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -646,10 +655,14 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) {
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- ConstantSDNode *CN = BV->getConstantSplatValue();
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
- if (CN && CN->getValueType(0) == N.getValueType().getScalarType())
+ // FIXME: We blindly ignore splats which include undef which is overly
+ // pessimistic.
+ if (CN && UndefElements.none() &&
+ CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
@@ -762,14 +775,10 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// If the operands of this node are only used by the node, they will now
// be dead. Make sure to visit them first to delete dead nodes early.
- for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) {
- SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode();
- // For an operand generating multiple values, one of the values may
- // become dead allowing further simplification (e.g. split index
- // arithmetic from an indexed load).
- if (Op->hasOneUse() || Op->getNumValues() > 1)
- AddToWorkList(Op);
- }
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
DAG.DeleteNode(TLO.Old.getNode());
}
}
@@ -1320,9 +1329,16 @@ SDValue DAGCombiner::combine(SDNode *N) {
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
- SDValue Ops[] = { N1, N0 };
- SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
- Ops);
+ SDValue Ops[] = {N1, N0};
+ SDNode *CSENode;
+ if (const BinaryWithFlagsSDNode *BinNode =
+ dyn_cast<BinaryWithFlagsSDNode>(N)) {
+ CSENode = DAG.getNodeIfExists(
+ N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
+ BinNode->hasNoSignedWrap(), BinNode->isExact());
+ } else {
+ CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
+ }
if (CSENode)
return SDValue(CSENode, 0);
}
@@ -3942,14 +3958,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// If setcc produces all-one true value then:
// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
if (N1CV && N1CV->isConstant()) {
- if (N0.getOpcode() == ISD::AND &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ if (N0.getOpcode() == ISD::AND) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
- if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+ TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
if (C.getNode())
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
@@ -4508,11 +4524,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
if (VT.isInteger() &&
- (VT0 == MVT::i1 ||
- (VT0.isInteger() &&
- TLI.getBooleanContents(false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
+ (VT0 == MVT::i1 || (VT0.isInteger() &&
+ TLI.getBooleanContents(false, false) ==
+ TLI.getBooleanContents(false, true) &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
if (VT == VT0)
@@ -4555,12 +4580,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
- // FIXME:
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ if ((!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
@@ -4587,6 +4609,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
return std::make_pair(Lo, Hi);
}
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ MVT VT = N->getSimpleValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+ // We're sure we have an even number of elements due to the
+ // concat_vectors we have as arguments to vselect.
+ // Skip BV elements until we find one that's not an UNDEF
+ // After we find an UNDEF element, keep looping until we get to half the
+ // length of the BV and see if all the non-undef nodes are the same.
+ ConstantSDNode *BottomHalf = nullptr;
+ for (int i = 0; i < NumElems / 2; ++i) {
+ if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (BottomHalf == nullptr)
+ BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != BottomHalf)
+ return SDValue();
+ }
+
+ // Do the same for the second half of the BuildVector
+ ConstantSDNode *TopHalf = nullptr;
+ for (int i = NumElems / 2; i < NumElems; ++i) {
+ if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (TopHalf == nullptr)
+ TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != TopHalf)
+ return SDValue();
+ }
+
+ assert(TopHalf && BottomHalf &&
+ "One half of the selector was all UNDEFs and the other was all the "
+ "same value. This should have been addressed before this function.");
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, VT,
+ BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4659,6 +4731,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N2;
+ // The ConvertSelectToConcatVector function is assuming both the above
+ // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+ // and addressed.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N2.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ SDValue CV = ConvertSelectToConcatVector(N, DAG);
+ if (CV.getNode())
+ return CV;
+ }
+
return SDValue();
}
@@ -5003,12 +5086,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
if (N0.getOpcode() == ISD::SETCC) {
+ EVT N0VT = N0.getOperand(0).getValueType();
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
- EVT N0VT = N0.getOperand(0).getValueType();
+ TLI.getBooleanContents(N0VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
@@ -6140,6 +6223,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
+ TLI.hasBigEndianPartOrdering(VT) &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -6955,11 +7041,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- // (See also visitSELECT)
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
@@ -7012,11 +7094,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- // (See also visitSELECT)
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
@@ -7849,17 +7927,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
-/// \brief Return the base-pointer arithmetic from an indexed \p LD.
-SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
- ISD::MemIndexedMode AM = LD->getAddressingMode();
- assert(AM != ISD::UNINDEXED);
- SDValue BP = LD->getOperand(1);
- SDValue Inc = LD->getOperand(2);
- unsigned Opc =
- (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
- return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
-}
-
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -7896,16 +7963,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (!N->hasAnyUseOfValue(0)) {
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
- SDValue Index;
- if (N->hasAnyUseOfValue(1)) {
- Index = SplitIndexingFromLoad(LD);
- // Try to fold the base pointer arithmetic into subsequent loads and
- // stores.
- AddUsersToWorkList(N);
- } else
- Index = DAG.getUNDEF(N->getValueType(1));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
@@ -7913,7 +7972,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);
@@ -9666,6 +9726,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return SDValue();
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ // Canonicalize insert_vector_elt dag nodes.
+ // Example:
+ // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+ // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+ //
+ // Do this only if the child insert_vector node has one use; also
+ // do this only if indices are both constants and Idx1 < Idx0.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+ && isa<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned OtherElt =
+ cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+ if (Elt < OtherElt) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+ InVec.getOperand(0), InVal, EltNo);
+ AddToWorkList(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+ VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+ }
+ }
+
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
@@ -9698,6 +9779,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+ EVT ResultVT = EVE->getValueType(0);
+ EVT VecEltVT = InVecVT.getVectorElementType();
+ unsigned Align = OriginalLoad->getAlignment();
+ unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+ return SDValue();
+
+ Align = NewAlign;
+
+ SDValue NewPtr = OriginalLoad->getBasePtr();
+ SDValue Offset;
+ EVT PtrType = NewPtr.getValueType();
+ MachinePointerInfo MPI;
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+ int Elt = ConstEltNo->getZExtValue();
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+ if (TLI.isBigEndian())
+ PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
+ Offset = DAG.getConstant(PtrOff, PtrType);
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ } else {
+ Offset = DAG.getNode(
+ ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
+ DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
+ if (TLI.isBigEndian())
+ Offset = DAG.getNode(
+ ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
+ DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+ MPI = OriginalLoad->getPointerInfo();
+ }
+ NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (ResultVT.bitsGT(VecEltVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+ ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
+ NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
+ OriginalLoad->isNonTemporal(), Align,
+ OriginalLoad->getTBAAInfo());
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(
+ VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
+ OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
+ Chain = Load.getValue(1);
+ if (ResultVT.bitsLT(VecEltVT))
+ Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Since we're explicitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(EVE);
+ ++OpsNarrowed;
+ return SDValue(EVE, 0);
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
@@ -9766,6 +9927,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ }
+
+ // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
+ if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
+ ISD::isNormalLoad(InVec.getNode())) {
+ SDValue Index = N->getOperand(1);
+ if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+ OrigLoad);
+ }
+
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
@@ -9776,30 +9969,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- bool NewLoad = false;
- bool BCNumEltsChanged = false;
- EVT ExtVT = VT.getVectorElementType();
- EVT LVT = ExtVT;
-
- // If the result of load has to be truncated, then it's not necessarily
- // profitable.
- if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
- return SDValue();
-
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- EVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
- return SDValue();
- if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
- BCNumEltsChanged = true;
- InVec = InVec.getOperand(0);
- ExtVT = BCVT.getVectorElementType();
- NewLoad = true;
- }
LoadSDNode *LN0 = nullptr;
const ShuffleVectorSDNode *SVN = nullptr;
@@ -9842,6 +10011,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+ EltNo = DAG.getConstant(Elt, EltNo.getValueType());
}
}
@@ -9854,72 +10024,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (Elt == -1)
return DAG.getUNDEF(LVT);
- unsigned Align = LN0->getAlignment();
- if (NewLoad) {
- // Check the resultant load doesn't need a higher alignment than the
- // original load.
- unsigned NewAlign =
- TLI.getDataLayout()
- ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
- return SDValue();
-
- Align = NewAlign;
- }
-
- SDValue NewPtr = LN0->getBasePtr();
- unsigned PtrOff = 0;
-
- if (Elt) {
- PtrOff = LVT.getSizeInBits() * Elt / 8;
- EVT PtrType = NewPtr.getValueType();
- if (TLI.isBigEndian())
- PtrOff = VT.getSizeInBits() / 8 - PtrOff;
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
- DAG.getConstant(PtrOff, PtrType));
- }
-
- // The replacement we need to do here is a little tricky: we need to
- // replace an extractelement of a load with a load.
- // Use ReplaceAllUsesOfValuesWith to do the replacement.
- // Note that this replacement assumes that the extractvalue is the only
- // use of the load; that's okay because we don't want to perform this
- // transformation in other cases anyway.
- SDValue Load;
- SDValue Chain;
- if (NVT.bitsGT(LVT)) {
- // If the result type of vextract is wider than the load, then issue an
- // extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
- ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
- NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
- LVT, LN0->isVolatile(), LN0->isNonTemporal(),
- Align, LN0->getTBAAInfo());
- Chain = Load.getValue(1);
- } else {
- Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Align, LN0->getTBAAInfo());
- Chain = Load.getValue(1);
- if (NVT.bitsLT(LVT))
- Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
- else
- Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
- }
- WorkListRemover DeadNodes(*this);
- SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
- SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
- // Since we're explcitly calling ReplaceAllUses, add the new node to the
- // worklist explicitly as well.
- AddToWorkList(Load.getNode());
- AddUsersToWorkList(Load.getNode()); // Add users too
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorkList(N);
- return SDValue(N, 0);
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
}
return SDValue();
@@ -10280,10 +10385,24 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Opnds;
unsigned BuildVecNumElts = N0.getNumOperands();
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
+ EVT SclTy0 = N0.getOperand(0)->getValueType(0);
+ EVT SclTy1 = N1.getOperand(0)->getValueType(0);
+ if (SclTy0.isFloatingPoint()) {
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N0.getOperand(i));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N1.getOperand(i));
+ } else {
+ // If BUILD_VECTOR are from built from integer, they may have different
+ // operand types. Get the smaller type and truncate all operands to it.
+ EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N0.getOperand(i)));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N1.getOperand(i)));
+ }
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
@@ -10558,22 +10677,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// If this shuffle node is simply a swizzle of another shuffle node,
- // and it reverses the swizzle of the previous shuffle then we can
- // optimize shuffle(shuffle(x, undef), undef) -> x.
+ // then try to simplify it.
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
N1.getOpcode() == ISD::UNDEF) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
- // Shuffle nodes can only reverse shuffles with a single non-undef value.
- if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
- return SDValue();
-
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(OtherSV->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask.
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
assert(Idx < (int)NumElts && "Index references undef operand");
@@ -10581,13 +10697,71 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// shuffle. Adopt the incoming index.
if (Idx >= 0)
Idx = OtherSV->getMaskElt(Idx);
+ Mask.push_back(Idx);
+ }
+
+ bool CommuteOperands = false;
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
+ // To be valid, the combine shuffle mask should only reference elements
+ // from one of the two vectors in input to the inner shufflevector.
+ bool IsValidMask = true;
+ for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+ // See if the combined mask only reference undefs or elements coming
+ // from the first shufflevector operand.
+ IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
+
+ if (!IsValidMask) {
+ IsValidMask = true;
+ for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+ // Check that all the elements come from the second shuffle operand.
+ IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
+ CommuteOperands = IsValidMask;
+ }
- // The combined shuffle must map each index to itself.
- if (Idx >= 0 && (unsigned)Idx != i)
+ // Early exit if the combined shuffle mask is not valid.
+ if (!IsValidMask)
return SDValue();
}
- return OtherSV->getOperand(0);
+ // See if this pair of shuffles can be safely folded according to either
+ // of the following rules:
+ // shuffle(shuffle(x, y), undef) -> x
+ // shuffle(shuffle(x, undef), undef) -> x
+ // shuffle(shuffle(x, y), undef) -> y
+ bool IsIdentityMask = true;
+ unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
+ for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
+ // Skip Undefs.
+ if (Mask[i] < 0)
+ continue;
+
+ // The combined shuffle must map each index to itself.
+ IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
+ }
+
+ if (IsIdentityMask) {
+ if (CommuteOperands)
+ // optimize shuffle(shuffle(x, y), undef) -> y.
+ return OtherSV->getOperand(1);
+
+ // optimize shuffle(shuffle(x, undef), undef) -> x
+ // optimize shuffle(shuffle(x, y), undef) -> x
+ return OtherSV->getOperand(0);
+ }
+
+ // It may still be beneficial to combine the two shuffles if the
+ // resulting shuffle is legal.
+ if (TLI.isShuffleMaskLegal(Mask, VT)) {
+ if (!CommuteOperands)
+ // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+ &Mask[0]);
+
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
+ &Mask[0]);
+ }
}
return SDValue();
@@ -10729,6 +10903,27 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
}
+ // Type legalization might introduce new shuffles in the DAG.
+ // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
+ // -> (shuffle (VBinOp (A, B)), Undef, Mask).
+ if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
+ isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
+ RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+
+ if (SVN0->getMask().equals(SVN1->getMask())) {
+ EVT VT = N->getValueType(0);
+ SDValue UndefVector = LHS.getOperand(1);
+ SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ LHS.getOperand(0), RHS.getOperand(0));
+ AddUsersToWorkList(N);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
+ &SVN0->getMask()[0]);
+ }
+ }
+
return SDValue();
}
@@ -11080,8 +11275,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
- TLI.getBooleanContents(N0.getValueType().isVector()) ==
- TargetLowering::ZeroOrOneBooleanContent) {
+ TLI.getBooleanContents(N0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 99931c1..445572a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -42,12 +42,15 @@
#include "llvm/CodeGen/FastISel.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
@@ -558,6 +561,107 @@ bool FastISel::SelectGetElementPtr(const User *I) {
return true;
}
+/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands
+/// to a stackmap or patchpoint machine instruction.
+bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
+ const CallInst *CI, unsigned StartIdx) {
+ for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+ Value *Val = CI->getArgOperand(i);
+ // Check for constants and encode them with a StackMaps::ConstantOp prefix.
+ if (auto *C = dyn_cast<ConstantInt>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
+ } else if (isa<ConstantPointerNull>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(0));
+ } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
+ // Values coming from a stack location also require a sepcial encoding,
+ // but that is added later on by the target specific frame index
+ // elimination implementation.
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ Ops.push_back(MachineOperand::CreateFI(SI->second));
+ else
+ return false;
+ } else {
+ unsigned Reg = getRegForValue(Val);
+ if (Reg == 0)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ }
+ }
+
+ return true;
+}
+
+bool FastISel::SelectStackmap(const CallInst *I) {
+ // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+ assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
+ "Stackmap cannot return a value.");
+
+ // The stackmap intrinsic only records the live variables (the arguments
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target-specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // CALLSEQ_START(0)
+ // STACKMAP(id, nbytes, ...)
+ // CALLSEQ_END(0, 0)
+ //
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Push live variables for the stack map (skipping the first two arguments
+ // <id> and <numBytes>).
+ if (!addStackMapLiveVars(Ops, I, 2))
+ return false;
+
+ // We are not adding any register mask info here, because the stackmap doesn't
+ // clobber anything.
+
+ // Add scratch registers as implicit def and early clobber.
+ CallingConv::ID CC = I->getCallingConv();
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+ /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
+ .addImm(0);
+
+ // Issue STACKMAP.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::STACKMAP));
+ for (auto const &MO : Ops)
+ MIB.addOperand(MO);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
+ .addImm(0).addImm(0);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo()->setHasStackMap();
+
+ return true;
+}
+
bool FastISel::SelectCall(const User *I) {
const CallInst *Call = cast<CallInst>(I);
@@ -713,6 +817,8 @@ bool FastISel::SelectCall(const User *I) {
UpdateValueMap(Call, ResultReg);
return true;
}
+ case Intrinsic::experimental_stackmap:
+ return SelectStackmap(Call);
}
// Usually, it does not make sense to initialize a value,
@@ -879,7 +985,6 @@ FastISel::SelectInstruction(const Instruction *I) {
/// the CFG.
void
FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
-
if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
// For more accurate line information if this is the only instruction
@@ -890,7 +995,11 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
SmallVector<MachineOperand, 0>(), DbgLoc);
}
- FuncInfo.MBB->addSuccessor(MSucc);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(),
+ MSucc->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
}
/// SelectFNeg - Emit an FNeg operation.
@@ -1101,6 +1210,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
FastISel::FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FuncInfo(funcInfo),
+ MF(funcInfo.MF),
MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()),
MCP(*FuncInfo.MF->getConstantPool()),
@@ -1635,3 +1745,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
}
+MachineMemOperand *
+FastISel::createMachineMemOperandFor(const Instruction *I) const {
+ const Value *Ptr;
+ Type *ValTy;
+ unsigned Alignment;
+ unsigned Flags;
+ bool IsVolatile;
+
+ if (const auto *LI = dyn_cast<LoadInst>(I)) {
+ Alignment = LI->getAlignment();
+ IsVolatile = LI->isVolatile();
+ Flags = MachineMemOperand::MOLoad;
+ Ptr = LI->getPointerOperand();
+ ValTy = LI->getType();
+ } else if (const auto *SI = dyn_cast<StoreInst>(I)) {
+ Alignment = SI->getAlignment();
+ IsVolatile = SI->isVolatile();
+ Flags = MachineMemOperand::MOStore;
+ Ptr = SI->getPointerOperand();
+ ValTy = SI->getValueOperand()->getType();
+ } else {
+ return nullptr;
+ }
+
+ bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
+ bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
+ const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
+ const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0.
+ Alignment = DL.getABITypeAlignment(ValTy);
+
+ unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);
+
+ if (IsVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (IsNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+ if (IsInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
+
+ return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
+ Alignment, TBAAInfo, Ranges);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a59e895..c0e8c8c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2060,7 +2060,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2095,7 +2095,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2129,7 +2129,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2266,7 +2266,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2381,7 +2381,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(TLI.getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()), Callee, &Args, 0);
+ Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2650,12 +2650,15 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+ // A larger signed type can hold all unsigned values of the requested type,
+ // so using FP_TO_SINT is valid
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
OpToUse = ISD::FP_TO_SINT;
break;
}
- if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ // However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
+ if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
OpToUse = ISD::FP_TO_UINT;
break;
}
@@ -2996,8 +2999,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()),
- &Args, 0);
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy()), std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -3007,14 +3010,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::ATOMIC_LOAD: {
// There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
- cast<AtomicSDNode>(Node)->getMemoryVT(),
- Node->getOperand(0),
- Node->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
Results.push_back(Swap.getValue(0));
Results.push_back(Swap.getValue(1));
break;
@@ -3051,6 +3054,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp.second);
break;
}
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
+ // splits out the success value as a comparison. Expanding the resulting
+ // ATOMIC_CMP_SWAP will produce a libcall.
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
+ Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getSuccessOrdering(),
+ cast<AtomicSDNode>(Node)->getFailureOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+
+ SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1),
+ Res, Node->getOperand(2), ISD::SETEQ);
+
+ Results.push_back(Res.getValue(0));
+ Results.push_back(Success);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
case ISD::DYNAMIC_STACKALLOC:
ExpandDYNAMIC_STACKALLOC(Node, Results);
break;
@@ -3074,7 +3098,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort", TLI.getPointerTy()), &Args, 0);
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -3128,6 +3153,65 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(0), Node->getValueType(0), dl);
Results.push_back(Tmp1);
break;
+ case ISD::FP_TO_SINT: {
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (VT != MVT::f32 || NVT != MVT::i64)
+ break;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits());
+ SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
+ SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
+ SDValue Bias = DAG.getConstant(127, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+ IntVT);
+ SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
+ SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+
+ SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT)));
+ SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT)));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+
+ SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, IntVT));
+
+ R = DAG.getZExtOrTrunc(R, dl, NVT);
+
+
+ R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
+ DAG.getNode(ISD::SHL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+ dl, TLI.getShiftAmountTy(IntVT))),
+ DAG.getNode(ISD::SRL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+ dl, TLI.getShiftAmountTy(IntVT))),
+ ISD::SETGT);
+
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
+ Sign);
+
+ Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
+ DAG.getConstant(0, NVT), Ret, ISD::SETLT));
+ break;
+ }
case ISD::FP_TO_UINT: {
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
@@ -3653,7 +3737,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
LHS, RHS);
Results.push_back(Sum);
- EVT OType = Node->getValueType(1);
+ EVT ResultType = Node->getValueType(1);
+ EVT OType = getSetCCResultType(Node->getValueType(0));
SDValue Zero = DAG.getConstant(0, LHS.getValueType());
@@ -3676,7 +3761,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
- Results.push_back(Cmp);
+ Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType));
break;
}
case ISD::UADDO:
@@ -3687,9 +3772,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
LHS, RHS);
Results.push_back(Sum);
- Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
- Node->getOpcode () == ISD::UADDO ?
- ISD::SETULT : ISD::SETUGT));
+
+ EVT ResultType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(Node->getValueType(0));
+ ISD::CondCode CC
+ = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+ SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+ Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
break;
}
case ISD::UMULO:
@@ -3879,7 +3969,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
int TrueValue;
- switch (TLI.getBooleanContents(VT.isVector())) {
+ switch (TLI.getBooleanContents(Tmp1->getValueType(0))) {
case TargetLowering::ZeroOrOneBooleanContent:
case TargetLowering::UndefinedBooleanContent:
TrueValue = 1;
@@ -3899,13 +3989,29 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp2 = Node->getOperand(1); // RHS
Tmp3 = Node->getOperand(2); // True
Tmp4 = Node->getOperand(3); // False
+ EVT VT = Node->getValueType(0);
SDValue CC = Node->getOperand(4);
+ ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
+
+ if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) {
+ // If the condition code is legal, then we need to expand this
+ // node using SETCC and SELECT.
+ EVT CmpVT = Tmp1.getValueType();
+ assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
+ "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
+ "expanded.");
+ EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT);
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
+ Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
+ break;
+ }
+ // SELECT_CC is legal, so the condition code must not be.
bool Legalized = false;
// Try to legalize by inverting the condition. This is for targets that
// might support an ordered version of a condition, but not the unordered
// version (or vice versa).
- ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
Tmp1.getValueType().isInteger());
if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
// Use the new condition code and swap true and false
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 2483184..6feac0d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -138,7 +138,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
case ISD::ATOMIC_CMP_SWAP:
- Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -192,16 +194,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
+ unsigned ResNo) {
+ if (ResNo == 1) {
+ assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ EVT SVT = getSetCCResultType(N->getOperand(2).getValueType());
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
+ N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
+ N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(),
+ N->getSynchScope());
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
+ return Res.getValue(1);
+ }
+
SDValue Op2 = GetPromotedInteger(N->getOperand(2));
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
- SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
- N->getChain(), N->getBasePtr(), Op2, Op3,
- N->getMemOperand(), N->getSuccessOrdering(),
- N->getFailureOrdering(), N->getSynchScope());
+ SDVTList VTs =
+ DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
+ N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
+ N->getFailureOrdering(), N->getSynchScope());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ unsigned ChainOp = N->getNumValues() - 1;
+ ReplaceValueWith(SDValue(N, ChainOp), Res.getValue(ChainOp));
return Res;
}
@@ -492,7 +519,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
EVT OpTy = N->getOperand(1).getValueType();
// Promote all the way up to the canonical SetCC type.
- Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy));
+ Mask = PromoteTargetBoolean(Mask, OpTy);
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, SDLoc(N),
@@ -892,8 +919,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
assert(OpNo == 1 && "only know how to promote condition");
// Promote all the way up to the canonical SetCC type.
- EVT SVT = getSetCCResultType(MVT::Other);
- SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other);
// The chain (Op#0) and basic block destination (Op#2) are always legal types.
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
@@ -986,9 +1012,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
EVT OpTy = N->getOperand(1).getValueType();
// Promote all the way up to the canonical SetCC type.
- EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ?
- OpTy.getScalarType() : OpTy);
- Cond = PromoteTargetBoolean(Cond, SVT);
+ EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
+ Cond = PromoteTargetBoolean(Cond, OpVT);
return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
N->getOperand(2)), 0);
@@ -1143,6 +1168,26 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
ReplaceValueWith(SDValue(N, 1), Tmp.second);
break;
}
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other);
+ SDValue Tmp = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
+ N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(),
+ AN->getSynchScope());
+
+ // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
+ // success simply by comparing the loaded value against the ingoing
+ // comparison.
+ SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp,
+ N->getOperand(2), ISD::SETEQ);
+
+ SplitInteger(Tmp, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Success);
+ ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1));
+ break;
+ }
case ISD::AND:
case ISD::OR:
@@ -2301,7 +2346,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0)
.setSExtResult();
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2388,16 +2433,18 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
SDValue Zero = DAG.getConstant(0, VT);
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
- N->getOperand(0),
- N->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(N)->getMemOperand(),
- cast<AtomicSDNode>(N)->getOrdering(),
- cast<AtomicSDNode>(N)->getOrdering(),
- cast<AtomicSDNode>(N)->getSynchScope());
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
+ N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+
ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
- ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 3971fc3..bd7dacf 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1054,7 +1054,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1065,11 +1065,14 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
/// of the given type. A target boolean is an integer value, not necessarily of
/// type i1, the bits of which conform to getBooleanContents.
-SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+///
+/// ValVT is the type of values that produced the boolean.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
SDLoc dl(Bool);
+ EVT BoolVT = getSetCCResultType(ValVT);
ISD::NodeType ExtendCode =
- TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector()));
- return DAG.getNode(ExtendCode, dl, VT, Bool);
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT));
+ return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
}
/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e4bbc78..d0ca6f8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -167,7 +167,7 @@ private:
SDNode *Node, bool isSigned);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
- SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -220,7 +220,7 @@ private:
SDValue PromoteIntRes_AssertZext(SDNode *N);
SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
- SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
@@ -570,6 +570,7 @@ private:
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
@@ -644,6 +645,7 @@ private:
bool WidenVectorOperand(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTEND(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index f40ed76..7e2f7b6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -60,12 +60,15 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypeExpandFloat:
// Convert the expanded pieces of the input.
GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.hasBigEndianPartOrdering(InVT) !=
+ TLI.hasBigEndianPartOrdering(OutVT))
+ std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case TargetLowering::TypeSplitVector:
GetSplitVector(InOp, Lo, Hi);
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(OutVT))
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -82,7 +85,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(OutVT))
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -176,7 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
false, false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(OutVT))
std::swap(Lo, Hi);
}
@@ -245,7 +248,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ EVT ValueVT = LD->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
unsigned Alignment = LD->getAlignment();
@@ -275,7 +279,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
Hi.getValue(1));
// Handle endianness of the load.
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(ValueVT))
std::swap(Lo, Hi);
// Modified the chain - switch anything that used the old chain to use
@@ -295,7 +299,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
// Handle endianness of the load.
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(OVT))
std::swap(Lo, Hi);
// Modified the chain - switch anything that used the old chain to use
@@ -459,8 +463,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
SDLoc dl(N);
StoreSDNode *St = cast<StoreSDNode>(N);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
- St->getValue().getValueType());
+ EVT ValueVT = St->getValue().getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
unsigned Alignment = St->getAlignment();
@@ -474,7 +478,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
SDValue Lo, Hi;
GetExpandedOp(St->getValue(), Lo, Hi);
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(ValueVT))
std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 898cd29..507e7ff 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -37,12 +37,12 @@ class VectorLegalizer {
const TargetLowering &TLI;
bool Changed; // Keep track of whether anything changed
- /// LegalizedNodes - For nodes that are of legal width, and that have more
- /// than one use, this map indicates what regularized operand to use. This
- /// allows us to avoid legalizing the same thing more than once.
+ /// For nodes that are of legal width, and that have more than one use, this
+ /// map indicates what regularized operand to use. This allows us to avoid
+ /// legalizing the same thing more than once.
SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
- // Adds a node to the translation cache
+ /// \brief Adds a node to the translation cache.
void AddLegalizedOperand(SDValue From, SDValue To) {
LegalizedNodes.insert(std::make_pair(From, To));
// If someone requests legalization of the new node, return itself.
@@ -50,41 +50,81 @@ class VectorLegalizer {
LegalizedNodes.insert(std::make_pair(To, To));
}
- // Legalizes the given node
+ /// \brief Legalizes the given node.
SDValue LegalizeOp(SDValue Op);
- // Assuming the node is legal, "legalize" the results
+
+ /// \brief Assuming the node is legal, "legalize" the results.
SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
- // Implements unrolling a VSETCC.
+
+ /// \brief Implements unrolling a VSETCC.
SDValue UnrollVSETCC(SDValue Op);
- // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
- // isn't legal.
- // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
- // SINT_TO_FLOAT and SHR on vectors isn't legal.
+
+ /// \brief Implement expand-based legalization of vector operations.
+ ///
+ /// This is just a high-level routine to dispatch to specific code paths for
+ /// operations to legalize them.
+ SDValue Expand(SDValue Op);
+
+ /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if
+ /// FSUB isn't legal.
+ ///
+ /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ /// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
- // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+
+ /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
SDValue ExpandSEXTINREG(SDValue Op);
- // Expand bswap of vectors into a shuffle if legal.
+
+ /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and bitcasts to the proper
+ /// type. The contents of the bits in the extended part of each element are
+ /// undef.
+ SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place, bitcasts to the proper
+ /// type, then shifts left and arithmetic shifts right to introduce a sign
+ /// extension.
+ SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and blends zeros into
+ /// the remaining lanes, finally bitcasting to the proper type.
+ SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Expand bswap of vectors into a shuffle if legal.
SDValue ExpandBSWAP(SDValue Op);
- // Implement vselect in terms of XOR, AND, OR when blend is not supported
- // by the target.
+
+ /// \brief Implement vselect in terms of XOR, AND, OR when blend is not
+ /// supported by the target.
SDValue ExpandVSELECT(SDValue Op);
SDValue ExpandSELECT(SDValue Op);
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
- // Implements vector promotion; this is essentially just bitcasting the
- // operands to a different type and bitcasting the result back to the
- // original type.
- SDValue PromoteVectorOp(SDValue Op);
- // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
- // operand to the next size up.
- SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
- // Implements FP_TO_[SU]INT vector promotion of the result type; it is
- // promoted to the next size up integer type. The result is then truncated
- // back to the original type.
- SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned);
-
- public:
+
+ /// \brief Implements vector promotion.
+ ///
+ /// This is essentially just bitcasting the operands to a different type and
+ /// bitcasting the result back to the original type.
+ SDValue Promote(SDValue Op);
+
+ /// \brief Implements [SU]INT_TO_FP vector promotion.
+ ///
+ /// This is a [zs]ext of the input operand to the next size up.
+ SDValue PromoteINT_TO_FP(SDValue Op);
+
+ /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
+ ///
+ /// It is promoted to the next size up integer type. The result is then
+ /// truncated back to the original type.
+ SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
+
+public:
+ /// \brief Begin legalizer the vector operations in the DAG.
bool Run();
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
@@ -254,6 +294,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_EXTEND:
case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
@@ -267,27 +310,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
case TargetLowering::Promote:
- switch (Op.getOpcode()) {
- default:
- // "Promote" the operation by bitcasting
- Result = PromoteVectorOp(Op);
- Changed = true;
- break;
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- // "Promote" the operation by extending the operand.
- Result = PromoteVectorOpINT_TO_FP(Op);
- Changed = true;
- break;
- case ISD::FP_TO_UINT:
- case ISD::FP_TO_SINT:
- // Promote the operation by extending the operand.
- Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
- Changed = true;
- break;
- }
+ Result = Promote(Op);
+ Changed = true;
+ break;
+ case TargetLowering::Legal:
break;
- case TargetLowering::Legal: break;
case TargetLowering::Custom: {
SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
if (Tmp1.getNode()) {
@@ -297,23 +324,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// FALL THROUGH
}
case TargetLowering::Expand:
- if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
- Result = ExpandSEXTINREG(Op);
- else if (Node->getOpcode() == ISD::BSWAP)
- Result = ExpandBSWAP(Op);
- else if (Node->getOpcode() == ISD::VSELECT)
- Result = ExpandVSELECT(Op);
- else if (Node->getOpcode() == ISD::SELECT)
- Result = ExpandSELECT(Op);
- else if (Node->getOpcode() == ISD::UINT_TO_FP)
- Result = ExpandUINT_TO_FLOAT(Op);
- else if (Node->getOpcode() == ISD::FNEG)
- Result = ExpandFNEG(Op);
- else if (Node->getOpcode() == ISD::SETCC)
- Result = UnrollVSETCC(Op);
- else
- Result = DAG.UnrollVectorOp(Op.getNode());
- break;
+ Result = Expand(Op);
}
// Make sure that the generated code is itself legal.
@@ -328,10 +339,23 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return Result;
}
-SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
- // Vector "promotion" is basically just bitcasting and doing the operation
- // in a different type. For example, x86 promotes ISD::AND on v2i32 to
- // v1i64.
+SDValue VectorLegalizer::Promote(SDValue Op) {
+ // For a few operations there is a specific concept for promotion based on
+ // the operand's type.
+ switch (Op.getOpcode()) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ return PromoteINT_TO_FP(Op);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ // Promote the operation by extending the operand.
+ return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+ }
+
+ // The rest of the time, vector "promotion" is basically just bitcasting and
+ // doing the operation in a different type. For example, x86 promotes
+ // ISD::AND on v2i32 to v1i64.
MVT VT = Op.getSimpleValueType();
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
@@ -351,7 +375,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
-SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
+SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
// INT_TO_FP operations may require the input operand be promoted even
// when the type is otherwise legal.
EVT VT = Op.getOperand(0).getValueType();
@@ -387,7 +411,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
// elements and then truncate the result. This is different from the default
// PromoteVector which uses bitcast to promote thus assumning that the
// promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) {
+SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
EVT VT = Op.getValueType();
@@ -609,6 +633,33 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
return TF;
}
+SDValue VectorLegalizer::Expand(SDValue Op) {
+ switch (Op->getOpcode()) {
+ case ISD::SIGN_EXTEND_INREG:
+ return ExpandSEXTINREG(Op);
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return ExpandANY_EXTEND_VECTOR_INREG(Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return ExpandZERO_EXTEND_VECTOR_INREG(Op);
+ case ISD::BSWAP:
+ return ExpandBSWAP(Op);
+ case ISD::VSELECT:
+ return ExpandVSELECT(Op);
+ case ISD::SELECT:
+ return ExpandSELECT(Op);
+ case ISD::UINT_TO_FP:
+ return ExpandUINT_TO_FLOAT(Op);
+ case ISD::FNEG:
+ return ExpandFNEG(Op);
+ case ISD::SETCC:
+ return UnrollVSETCC(Op);
+ default:
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+}
+
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
// Lower a select instruction where the condition is a scalar and the
// operands are vectors. Lower this select to VSELECT and implement it
@@ -686,6 +737,85 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
}
+// Generically expand a vector anyext in register to a shuffle of the relevant
+// lanes into the appropriate locations, with other lanes left undef.
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // Build a base mask of undef shuffles.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.resize(NumSrcElements, -1);
+
+ // Place the extended lanes into the correct locations.
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
+
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
+}
+
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // First build an any-extend node which can be legalized above when we
+ // recurse through it.
+ Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
+
+ // Now we need sign extend. Do this by shifting the elements. Even if these
+ // aren't legal operations, they have a better chance of being legalized
+ // without full scalarization than the sign extension does.
+ unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
+ unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
+ SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT);
+ return DAG.getNode(ISD::SRA, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
+ ShiftAmount);
+}
+
+// Generically expand a vector zext in register to a shuffle of the relevant
+// lanes into the appropriate locations, a blend of zero into the high bits,
+// and a bitcast to the wider element type.
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // Build up a zero vector to blend into this one.
+ EVT SrcScalarVT = SrcVT.getScalarType();
+ SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT);
+ SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
+
+ // Shuffle the incoming lanes into the correct position, and pull all other
+ // lanes from the zero vector.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.reserve(NumSrcElements);
+ for (int i = 0; i < NumSrcElements; ++i)
+ ShuffleMask.push_back(i);
+
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
+}
+
SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
EVT VT = Op.getValueType();
@@ -729,9 +859,9 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// FIXME: Sign extend 1 to all ones if thats legal on the target.
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
- TLI.getBooleanContents(true) !=
- TargetLowering::ZeroOrNegativeOneBooleanContent)
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getBooleanContents(Op1.getValueType()) !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
return DAG.UnrollVectorOp(Op.getNode());
// If the mask and the type are different sizes, unroll the vector op. This
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 368eba3..f77c592 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -257,8 +257,26 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
SDValue Cond = GetScalarizedVector(N->getOperand(0));
SDValue LHS = GetScalarizedVector(N->getOperand(1));
- TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
- TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+ TargetLowering::BooleanContent ScalarBool =
+ TLI.getBooleanContents(false, false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
+
+ // If integer and float booleans have different contents then we can't
+ // reliably optimize in all cases. There is a full explanation for this in
+ // DAGCombiner::visitSELECT() where the same issue affects folding
+ // (select C, 0, 1) to (xor C, 1).
+ if (TLI.getBooleanContents(false, false) !=
+ TLI.getBooleanContents(false, true)) {
+ // At least try the common case where the boolean is generated by a
+ // comparison.
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT OpVT = Cond->getOperand(0)->getValueType(0);
+ ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
+ VecBool = TLI.getBooleanContents(OpVT);
+ } else
+ ScalarBool = TargetLowering::UndefinedBooleanContent;
+ }
+
if (ScalarBool != VecBool) {
EVT CondVT = Cond.getValueType();
switch (ScalarBool) {
@@ -357,7 +375,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
// Vectors may have a different boolean contents to scalars. Promote the
// value appropriately.
ISD::NodeType ExtendCode =
- TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
return DAG.getNode(ExtendCode, DL, NVT, Res);
}
@@ -545,6 +563,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
@@ -765,6 +784,43 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
TLI.getVectorIdxTy()));
}
+void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT SubVecVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Store the new subvector into the specified index.
+ SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(),
+ false, false, 0);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr =
+ DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, MinAlign(Alignment, IncrementSize));
+}
+
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
@@ -1511,7 +1567,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD:
case ISD::AND:
- case ISD::BSWAP:
case ISD::MUL:
case ISD::MULHS:
case ISD::MULHU:
@@ -1558,6 +1613,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::CTTZ:
@@ -2343,15 +2399,18 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecOp_EXTEND(N);
+ break;
+
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
Res = WidenVecOp_Convert(N);
break;
}
@@ -2372,6 +2431,68 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
return false;
}
+SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue InOp = N->getOperand(0);
+ // If some legalization strategy other than widening is used on the operand,
+ // we can't safely assume that just extending the low lanes is the correct
+ // transformation.
+ if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
+ return WidenVecOp_Convert(N);
+ InOp = GetWidenedVector(InOp);
+ assert(VT.getVectorNumElements() <
+ InOp.getValueType().getVectorNumElements() &&
+ "Input wasn't widened!");
+
+ // We may need to further widen the operand until it has the same total
+ // vector size as the result.
+ EVT InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
+ EVT FixedVT = (MVT::SimpleValueType)i;
+ EVT FixedEltVT = FixedVT.getVectorElementType();
+ if (TLI.isTypeLegal(FixedVT) &&
+ FixedVT.getSizeInBits() == VT.getSizeInBits() &&
+ FixedEltVT == InEltVT) {
+ assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
+ "Not enough elements in the fixed type for the operand!");
+ assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
+ "We can't have the same type as we started with!");
+ if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
+ InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
+ DAG.getUNDEF(FixedVT), InOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ else
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ break;
+ }
+ }
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits())
+ // We couldn't find a legal vector type that was a widening of the input
+ // and could be extended in-register to the result type, so we have to
+ // scalarize.
+ return WidenVecOp_Convert(N);
+ }
+
+ // Use special DAG nodes to represent the operation of extending the
+ // low lanes.
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Extend legalization on on extend operation!");
+ case ISD::ANY_EXTEND:
+ return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
+ case ISD::SIGN_EXTEND:
+ return DAG.getSignExtendVectorInReg(InOp, DL, VT);
+ case ISD::ZERO_EXTEND:
+ return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+ }
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely
// that we can fix the input to a legal type so unroll the convert
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index f92230c..624003f 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -442,7 +442,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
ResCount -= (regPressureDelta(SU) * ScaleTwo);
}
- // These are platform specific things.
+ // These are platform-specific things.
// Will need to go into the back end
// and accessed from here via a hook.
for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 78ec4df..13cfae7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -170,7 +170,8 @@ public:
if (DisableSchedCycles || !NeedLatency)
HazardRec = new ScheduleHazardRecognizer();
else
- HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
+ tm.getSubtargetImpl(), this);
}
~ScheduleDAGRRList() {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 51c51d6..4589b0c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -73,7 +73,8 @@ public:
: ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
const TargetMachine &tm = mf.getTarget();
- HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
+ tm.getSubtargetImpl(), this);
}
~ScheduleDAGVLIW() {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b1b8035..daff1f2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -48,6 +48,7 @@
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include <algorithm>
#include <cmath>
+
using namespace llvm;
/// makeVTList - Return an instance of the SDVTList struct initialized with the
@@ -147,33 +148,34 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
- unsigned i = 0, e = N->getNumOperands();
-
- // Skip over all of the undef values.
- while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
- ++i;
+ bool IsAllUndef = true;
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ IsAllUndef = false;
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target
+ // and a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all zeros, not whether the individual
+ // constants are.
+ SDValue Zero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+ if (CN->getAPIntValue().countTrailingZeros() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
+ return false;
+ } else
+ return false;
+ }
// Do not accept an all-undef vector.
- if (i == e) return false;
-
- // Do not accept build_vectors that aren't all constants or which have non-0
- // elements.
- SDValue Zero = N->getOperand(i);
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
- if (!CN->isNullValue())
- return false;
- } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
- if (!CFPN->getValueAPF().isPosZero())
- return false;
- } else
+ if (IsAllUndef)
return false;
-
- // Okay, we have at least one 0 value, check to see if the rest match or are
- // undefs.
- for (++i; i != e; ++i)
- if (N->getOperand(i) != Zero &&
- N->getOperand(i).getOpcode() != ISD::UNDEF)
- return false;
return true;
}
@@ -381,6 +383,20 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
}
}
+static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw,
+ bool exact) {
+ ID.AddBoolean(nuw);
+ ID.AddBoolean(nsw);
+ ID.AddBoolean(exact);
+}
+
+/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos
+static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode,
+ bool nuw, bool nsw, bool exact) {
+ if (isBinOpWithFlags(Opcode))
+ AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
+}
+
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
SDVTList VTList, ArrayRef<SDValue> OpList) {
AddNodeIDOpcode(ID, OpC);
@@ -473,7 +489,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::MUL:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::SHL: {
+ const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N);
+ AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(),
+ BinNode->hasNoSignedWrap(), BinNode->isExact());
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -527,7 +557,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
// Add the return value info.
AddNodeIDValueTypes(ID, N->getVTList());
// Add the operand info.
- AddNodeIDOperands(ID, makeArrayRef(N->op_begin(), N->op_end()));
+ AddNodeIDOperands(ID, N->ops());
// Handle SDNode leafs with special info.
AddNodeIDCustom(ID, N);
@@ -926,6 +956,25 @@ void SelectionDAG::allnodes_clear() {
DeallocateNode(AllNodes.begin());
}
+BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
+ SDVTList VTs, SDValue N1,
+ SDValue N2, bool nuw, bool nsw,
+ bool exact) {
+ if (isBinOpWithFlags(Opcode)) {
+ BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(
+ Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ FN->setHasNoUnsignedWrap(nuw);
+ FN->setHasNoSignedWrap(nsw);
+ FN->setIsExact(exact);
+
+ return FN;
+ }
+
+ BinarySDNode *N = new (NodeAllocator)
+ BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ return N;
+}
+
void SelectionDAG::clear() {
allnodes_clear();
OperandAllocator.Reset();
@@ -963,11 +1012,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) {
getNode(ISD::TRUNCATE, DL, VT, Op);
}
-SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT) {
+SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT,
+ EVT OpVT) {
if (VT.bitsLE(Op.getValueType()))
return getNode(ISD::TRUNCATE, SL, VT, Op);
- TargetLowering::BooleanContent BType = TLI->getBooleanContents(VT.isVector());
+ TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT);
return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
}
@@ -983,6 +1033,36 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
getConstant(Imm, Op.getValueType()));
}
+SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
///
SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
@@ -995,7 +1075,7 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue TrueValue;
- switch (TLI->getBooleanContents(VT.isVector())) {
+ switch (TLI->getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
case TargetLowering::UndefinedBooleanContent:
TrueValue = getConstant(1, VT);
@@ -1190,15 +1270,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
if (BitWidth < 64)
Offset = SignExtend64(Offset, BitWidth);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias then use the aliasee for determining thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee());
- }
-
unsigned Opc;
- if (GVar && GVar->isThreadLocal())
+ if (GV->isThreadLocal())
Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
else
Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
@@ -1454,6 +1527,11 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
N1 = getUNDEF(VT);
commuteShuffle(N1, N2, MaskVec);
}
+ // Reset our undef status after accounting for the mask.
+ N2Undef = N2.getOpcode() == ISD::UNDEF;
+ // Re-check whether both sides ended up undef.
+ if (N1.getOpcode() == ISD::UNDEF && N2Undef)
+ return getUNDEF(VT);
// If Identity shuffle return that node.
bool Identity = true;
@@ -1464,9 +1542,36 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
return N1;
// Shuffling a constant splat doesn't change the result.
- if (N2Undef && N1.getOpcode() == ISD::BUILD_VECTOR)
- if (cast<BuildVectorSDNode>(N1)->getConstantSplatValue())
- return N1;
+ if (N2Undef) {
+ SDValue V = N1;
+
+ // Look through any bitcasts. We check that these don't change the number
+ // (and size) of elements and just changes their types.
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V->getOperand(0);
+
+ // A splat should always show up as a build vector node.
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ // If this is a splat of an undef, shuffling it is also undef.
+ if (Splat && Splat.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // We only have a splat which can skip shuffles if there is a splatted
+ // value and no undef lanes rearranged by the shuffle.
+ if (Splat && UndefElements.none()) {
+ // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
+ // number of elements match or the value splatted is a zero constant.
+ if (V.getValueType().getVectorNumElements() ==
+ VT.getVectorNumElements())
+ return N1;
+ if (auto *C = dyn_cast<ConstantSDNode>(Splat))
+ if (C->isNullValue())
+ return N1;
+ }
+ }
+ }
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
@@ -1692,7 +1797,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
case ISD::SETTRUE:
case ISD::SETTRUE2: {
const TargetLowering *TLI = TM.getTargetLowering();
- TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector());
+ TargetLowering::BooleanContent Cnt =
+ TLI->getBooleanContents(N1->getValueType(0));
return getConstant(
Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
}
@@ -1923,11 +2029,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::UMULO:
if (Op.getResNo() != 1)
break;
- // The boolean result conforms to getBooleanContents. Fall through.
+ // The boolean result conforms to getBooleanContents.
+ // If we know the result of a setcc has the top bits zero, use this info.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
- if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
- TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ISD::SHL:
@@ -2043,7 +2158,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
- computeKnownBitsLoad(*Ranges, KnownZero);
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
}
break;
}
@@ -2192,8 +2307,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- KnownZero |= ~LowBits;
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1);
+
+ // The upper bits are all zero, the lower ones are unchanged.
+ KnownZero = KnownZero2 | ~LowBits;
+ KnownOne = KnownOne2 & LowBits;
break;
}
}
@@ -2323,9 +2441,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents. Fall through.
+ // If setcc returns 0/-1, all bits are sign bits.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
case ISD::SETCC:
// If setcc returns 0/-1, all bits are sign bits.
- if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
@@ -2940,7 +3065,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
- SDValue N2) {
+ SDValue N2, bool nuw, bool nsw, bool exact) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
switch (Opcode) {
@@ -3380,22 +3505,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Memoize this node if possible.
- SDNode *N;
+ BinarySDNode *N;
SDVTList VTs = getVTList(VT);
+ const bool BinOpHasFlags = isBinOpWithFlags(Opcode);
if (VT != MVT::Glue) {
- SDValue Ops[] = { N1, N2 };
+ SDValue Ops[] = {N1, N2};
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
+ if (BinOpHasFlags)
+ AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact);
void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, N1, N2);
+ N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
+
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, N1, N2);
+
+ N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
}
AllNodes.push_back(N);
@@ -3583,7 +3711,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
if (Str.empty()) {
if (VT.isInteger())
return DAG.getConstant(0, VT);
- else if (VT == MVT::f32 || VT == MVT::f64)
+ else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
return DAG.getConstantFP(0.0, VT);
else if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
@@ -4110,7 +4238,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
Type::getVoidTy(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
- TLI->getPointerTy()), &Args, 0)
+ TLI->getPointerTy()), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -4166,7 +4294,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
Type::getVoidTy(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
- TLI->getPointerTy()), &Args, 0)
+ TLI->getPointerTy()), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -4230,7 +4358,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
Type::getVoidTy(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy()), &Args, 0)
+ TLI->getPointerTy()), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -4281,51 +4409,47 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
Ordering, SynchScope);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDValue Chain, SDValue Ptr, SDValue Cmp,
- SDValue Swp, MachinePointerInfo PtrInfo,
- unsigned Alignment,
- AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope) {
+SDValue SelectionDAG::getAtomicCmpSwap(
+ unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain,
+ SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
+ unsigned Alignment, AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+ Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
- // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE.
- // For now, atomics are considered to be volatile always.
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
unsigned Flags = MachineMemOperand::MOVolatile;
- if (Opcode != ISD::ATOMIC_STORE)
- Flags |= MachineMemOperand::MOLoad;
- if (Opcode != ISD::ATOMIC_LOAD)
- Flags |= MachineMemOperand::MOStore;
+ Flags |= MachineMemOperand::MOLoad;
+ Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
- SuccessOrdering, FailureOrdering, SynchScope);
+ return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO,
+ SuccessOrdering, FailureOrdering, SynchScope);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDValue Chain,
- SDValue Ptr, SDValue Cmp,
- SDValue Swp, MachineMemOperand *MMO,
- AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope) {
- assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT,
+ SDVTList VTs, SDValue Chain, SDValue Ptr,
+ SDValue Cmp, SDValue Swp,
+ MachineMemOperand *MMO,
+ AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+ Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
- EVT VT = Cmp.getValueType();
-
- SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, SuccessOrdering,
- FailureOrdering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO,
+ SuccessOrdering, FailureOrdering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -5610,10 +5734,13 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
/// getNodeIfExists - Get the specified node if it's already available, or
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
- ArrayRef<SDValue> Ops) {
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ ArrayRef<SDValue> Ops, bool nuw, bool nsw,
+ bool exact) {
+ if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
+ if (isBinOpWithFlags(Opcode))
+ AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return E;
@@ -5960,7 +6087,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// count of outstanding operands.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
SDNode *N = I++;
- checkForCycles(N);
+ checkForCycles(N, this);
unsigned Degree = N->getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
@@ -5980,7 +6107,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// such that by the time the end is reached all nodes will be sorted.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
SDNode *N = I;
- checkForCycles(N);
+ checkForCycles(N, this);
// N is in sorted position, so all its uses have one less operand
// that needs to be sorted.
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
@@ -6005,7 +6132,9 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
#ifndef NDEBUG
SDNode *S = ++I;
dbgs() << "Overran sorted position:\n";
- S->dumprFull();
+ S->dumprFull(this); dbgs() << "\n";
+ dbgs() << "Checking if this is due to cycles\n";
+ checkForCycles(this, true);
#endif
llvm_unreachable(nullptr);
}
@@ -6554,16 +6683,43 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
return true;
}
-ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const {
- SDValue Op0 = getOperand(0);
- if (Op0.getOpcode() != ISD::Constant)
- return nullptr;
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+ if (UndefElements) {
+ UndefElements->clear();
+ UndefElements->resize(getNumOperands());
+ }
+ SDValue Splatted;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ SDValue Op = getOperand(i);
+ if (Op.getOpcode() == ISD::UNDEF) {
+ if (UndefElements)
+ (*UndefElements)[i] = true;
+ } else if (!Splatted) {
+ Splatted = Op;
+ } else if (Splatted != Op) {
+ return SDValue();
+ }
+ }
+
+ if (!Splatted) {
+ assert(getOperand(0).getOpcode() == ISD::UNDEF &&
+ "Can only have a splat without a constant for all undefs.");
+ return getOperand(0);
+ }
- for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
- if (getOperand(i) != Op0)
- return nullptr;
+ return Splatted;
+}
- return cast<ConstantSDNode>(Op0);
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(
+ getSplatValue(UndefElements).getNode());
+}
+
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(
+ getSplatValue(UndefElements).getNode());
}
bool BuildVectorSDNode::isConstant() const {
@@ -6591,10 +6747,11 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return true;
}
-#ifdef XDEBUG
+#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSet<const SDNode*, 32> &Visited,
- SmallPtrSet<const SDNode*, 32> &Checked) {
+ SmallPtrSet<const SDNode*, 32> &Checked,
+ const llvm::SelectionDAG *DAG) {
// If this node has already been checked, don't check it again.
if (Checked.count(N))
return;
@@ -6602,29 +6759,37 @@ static void checkForCyclesHelper(const SDNode *N,
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
if (!Visited.insert(N)) {
- dbgs() << "Offending node:\n";
- N->dumprFull();
errs() << "Detected cycle in SelectionDAG\n";
+ dbgs() << "Offending node:\n";
+ N->dumprFull(DAG); dbgs() << "\n";
abort();
}
for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+ checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked, DAG);
Checked.insert(N);
Visited.erase(N);
}
#endif
-void llvm::checkForCycles(const llvm::SDNode *N) {
+void llvm::checkForCycles(const llvm::SDNode *N,
+ const llvm::SelectionDAG *DAG,
+ bool force) {
+#ifndef NDEBUG
+ bool check = force;
#ifdef XDEBUG
- assert(N && "Checking nonexistent SDNode");
- SmallPtrSet<const SDNode*, 32> visited;
- SmallPtrSet<const SDNode*, 32> checked;
- checkForCyclesHelper(N, visited, checked);
-#endif
+ check = true;
+#endif // XDEBUG
+ if (check) {
+ assert(N && "Checking nonexistent SDNode");
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked, DAG);
+ }
+#endif // !NDEBUG
}
-void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
- checkForCycles(DAG->getRoot().getNode());
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) {
+ checkForCycles(DAG->getRoot().getNode(), DAG, force);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 070e929..28d8e98 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -169,7 +169,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(ValueVT))
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
} else {
@@ -2784,8 +2784,22 @@ void SelectionDAGBuilder::visitFSub(const User &I) {
void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(OpCode, getCurSDLoc(),
- Op1.getValueType(), Op1, Op2));
+
+ bool nuw = false;
+ bool nsw = false;
+ bool exact = false;
+ if (const OverflowingBinaryOperator *OFBinOp =
+ dyn_cast<const OverflowingBinaryOperator>(&I)) {
+ nuw = OFBinOp->hasNoUnsignedWrap();
+ nsw = OFBinOp->hasNoSignedWrap();
+ }
+ if (const PossiblyExactOperator *ExactOp =
+ dyn_cast<const PossiblyExactOperator>(&I))
+ exact = ExactOp->isExact();
+
+ SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
+ Op1, Op2, nuw, nsw, exact);
+ setValue(&I, BinNodeValue);
}
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
@@ -2816,8 +2830,25 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
}
- setValue(&I, DAG.getNode(Opcode, getCurSDLoc(),
- Op1.getValueType(), Op1, Op2));
+ bool nuw = false;
+ bool nsw = false;
+ bool exact = false;
+
+ if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
+
+ if (const OverflowingBinaryOperator *OFBinOp =
+ dyn_cast<const OverflowingBinaryOperator>(&I)) {
+ nuw = OFBinOp->hasNoUnsignedWrap();
+ nsw = OFBinOp->hasNoSignedWrap();
+ }
+ if (const PossiblyExactOperator *ExactOp =
+ dyn_cast<const PossiblyExactOperator>(&I))
+ exact = ExactOp->isExact();
+ }
+
+ SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
+ nuw, nsw, exact);
+ setValue(&I, Res);
}
void SelectionDAGBuilder::visitSDiv(const User &I) {
@@ -3570,12 +3601,12 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
if (Before) {
if (Order == AcquireRelease || Order == SequentiallyConsistent)
Order = Release;
- else if (Order == Acquire || Order == Monotonic)
+ else if (Order == Acquire || Order == Monotonic || Order == Unordered)
return Chain;
} else {
if (Order == AcquireRelease)
Order = Acquire;
- else if (Order == Release || Order == Monotonic)
+ else if (Order == Release || Order == Monotonic || Order == Unordered)
return Chain;
}
SDValue Ops[3];
@@ -3598,19 +3629,17 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
DAG, *TLI);
- SDValue L =
- DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
- getValue(I.getCompareOperand()).getSimpleValueType(),
- InChain,
- getValue(I.getPointerOperand()),
- getValue(I.getCompareOperand()),
- getValue(I.getNewValOperand()),
- MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
- TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
- TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder,
- Scope);
+ MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
+ SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
+ SDValue L = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
+ getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
+ 0 /* Alignment */,
+ TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
+ TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope);
- SDValue OutChain = L.getValue(1);
+ SDValue OutChain = L.getValue(2);
if (TLI->getInsertFencesForAtomic())
OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
@@ -5293,7 +5322,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
CLI.setDebugLoc(sdl).setChain(getRoot())
.setCallee(CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()),
- &Args, 0);
+ std::move(Args), 0);
std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
DAG.setRoot(Result.second);
@@ -5410,6 +5439,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
bool isTailCall,
MachineBasicBlock *LandingPad) {
+ const TargetLowering *TLI = TM.getTargetLowering();
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
Type *RetTy = FTy->getReturnType();
@@ -5420,45 +5450,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
- // Check whether the function can return without sret-demotion.
- SmallVector<ISD::OutputArg, 4> Outs;
- const TargetLowering *TLI = TM.getTargetLowering();
- GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI);
-
- bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(),
- DAG.getMachineFunction(),
- FTy->isVarArg(), Outs,
- FTy->getContext());
-
- SDValue DemoteStackSlot;
- int DemoteStackIdx = -100;
-
- if (!CanLowerReturn) {
- assert(!CS.hasInAllocaArgument() &&
- "sret demotion is incompatible with inalloca");
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
- FTy->getReturnType());
- unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(
- FTy->getReturnType());
- MachineFunction &MF = DAG.getMachineFunction();
- DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
- Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
-
- DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy());
- Entry.Node = DemoteStackSlot;
- Entry.Ty = StackSlotPtrType;
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isInReg = false;
- Entry.isSRet = true;
- Entry.isNest = false;
- Entry.isByVal = false;
- Entry.isReturned = false;
- Entry.Alignment = Align;
- Args.push_back(Entry);
- RetTy = Type::getVoidTy(FTy->getContext());
- }
-
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
const Value *V = *i;
@@ -5499,58 +5490,20 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.
- if (isTailCall && !isInTailCallPosition(CS, *TLI))
+ if (isTailCall && !isInTailCallPosition(CS, DAG))
isTailCall = false;
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(RetTy, FTy, Callee, &Args, CS).setTailCall(isTailCall);
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall);
std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI);
assert((isTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
"Null value expected with tail call!");
- if (Result.first.getNode()) {
+ if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
- } else if (!CanLowerReturn && Result.second.getNode()) {
- // The instruction result is the result of loading from the
- // hidden sret parameter.
- SmallVector<EVT, 1> PVTs;
- Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
-
- ComputeValueVTs(*TLI, PtrRetTy, PVTs);
- assert(PVTs.size() == 1 && "Pointers should fit in one register");
- EVT PtrVT = PVTs[0];
-
- SmallVector<EVT, 4> RetTys;
- SmallVector<uint64_t, 4> Offsets;
- RetTy = FTy->getReturnType();
- ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets);
-
- unsigned NumValues = RetTys.size();
- SmallVector<SDValue, 4> Values(NumValues);
- SmallVector<SDValue, 4> Chains(NumValues);
-
- for (unsigned i = 0; i < NumValues; ++i) {
- SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT,
- DemoteStackSlot,
- DAG.getConstant(Offsets[i], PtrVT));
- SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add,
- MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
- false, false, false, 1);
- Values[i] = L;
- Chains[i] = L.getValue(1);
- }
-
- SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
- MVT::Other, Chains);
- PendingLoads.push_back(Chain);
-
- setValue(CS.getInstruction(),
- DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(RetTys), Values));
- }
if (!Result.second.getNode()) {
// As a special case, a null chain means that a tail call has been emitted
@@ -6845,7 +6798,7 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(CI.getCallingConv(), retTy, Callee, &Args, NumArgs)
+ .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
.setDiscardResult(!CI.use_empty());
const TargetLowering *TLI = TM.getTargetLowering();
@@ -7092,6 +7045,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
}
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+ Attrs);
+}
+
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
/// implementation, which just calls LowerCall.
/// FIXME: When all targets are
@@ -7100,24 +7068,62 @@ std::pair<SDValue, SDValue>
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Handle the incoming return values from the call.
CLI.Ins.clear();
+ Type *OrigRetTy = CLI.RetTy;
SmallVector<EVT, 4> RetTys;
- ComputeValueVTs(*this, CLI.RetTy, RetTys);
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
- for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
- MyFlags.ArgVT = VT;
- MyFlags.Used = CLI.IsReturnValueUsed;
- if (CLI.RetSExt)
- MyFlags.Flags.setSExt();
- if (CLI.RetZExt)
- MyFlags.Flags.setZExt();
- if (CLI.IsInReg)
- MyFlags.Flags.setInReg();
- CLI.Ins.push_back(MyFlags);
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets);
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this);
+
+ bool CanLowerReturn =
+ this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
+ CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
+ if (!CanLowerReturn) {
+ // FIXME: equivalent assert?
+ // assert(!CS.hasInAllocaArgument() &&
+ // "sret demotion is incompatible with inalloca");
+ uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy);
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy);
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
+
+ DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy());
+ ArgListEntry Entry;
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.isReturned = false;
+ Entry.Alignment = Align;
+ CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+ CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+ } else {
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
}
}
@@ -7260,31 +7266,59 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
"LowerCall emitted a value with the wrong type!");
});
- // Collect the legal value parts into potentially illegal values
- // that correspond to the original function's return values.
- ISD::NodeType AssertOp = ISD::DELETED_NODE;
- if (CLI.RetSExt)
- AssertOp = ISD::AssertSext;
- else if (CLI.RetZExt)
- AssertOp = ISD::AssertZext;
SmallVector<SDValue, 4> ReturnValues;
- unsigned CurReg = 0;
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-
- ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
- NumRegs, RegisterVT, VT, nullptr,
- AssertOp));
- CurReg += NumRegs;
- }
-
- // For a function returning void, there is no return value. We can't create
- // such a node, so we just return a null return value in that case. In
- // that case, nothing will actually look at the value.
- if (ReturnValues.empty())
- return std::make_pair(SDValue(), CLI.Chain);
+ if (!CanLowerReturn) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+
+ ComputeValueVTs(*this, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+
+ unsigned NumValues = RetTys.size();
+ ReturnValues.resize(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
+ CLI.DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = CLI.DAG.getLoad(
+ RetTys[i], CLI.DL, CLI.Chain, Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
+ false, false, 1);
+ ReturnValues[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
+ } else {
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (CLI.RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (CLI.RetZExt)
+ AssertOp = ISD::AssertZext;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+ NumRegs, RegisterVT, VT, nullptr,
+ AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actually look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), CLI.Chain);
+ }
SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
CLI.DAG.getVTList(RetTys), ReturnValues);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index fb29691..84679f9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -320,7 +320,7 @@ private:
/// 1. Preserve the architecture independence of stack protector generation.
///
/// 2. Preserve the normal IR level stack protector check for platforms like
- /// OpenBSD for which we support platform specific stack protector
+ /// OpenBSD for which we support platform-specific stack protector
/// generation.
///
/// The main problem that guided the present solution is that one can not
@@ -338,7 +338,7 @@ private:
/// basic block (where the return inst is placed) and then move it back
/// later at SelectionDAG/MI time before the stack protector check if the
/// tail call optimization failed. The MI level option was nixed
- /// immediately since it would require platform specific pattern
+ /// immediately since it would require platform-specific pattern
/// matching. The SelectionDAG level option was nixed because
/// SelectionDAG only processes one IR level basic block at a time
/// implying one could not create a DAG Combine to move the callinst.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index d6b5255..b3a452f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -55,6 +55,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::PREFETCH: return "Prefetch";
case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
case ISD::ATOMIC_SWAP: return "AtomicSwap";
case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
@@ -220,6 +221,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ZERO_EXTEND: return "zero_extend";
case ISD::ANY_EXTEND: return "any_extend";
case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg";
+ case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg";
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 472fc9c..57e22e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -141,6 +141,25 @@ STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+
+// Intrinsic instructions...
+STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call");
+STATISTIC(NumFastIselFailSAddWithOverflow,
+ "Fast isel fails on sadd.with.overflow");
+STATISTIC(NumFastIselFailUAddWithOverflow,
+ "Fast isel fails on uadd.with.overflow");
+STATISTIC(NumFastIselFailSSubWithOverflow,
+ "Fast isel fails on ssub.with.overflow");
+STATISTIC(NumFastIselFailUSubWithOverflow,
+ "Fast isel fails on usub.with.overflow");
+STATISTIC(NumFastIselFailSMulWithOverflow,
+ "Fast isel fails on smul.with.overflow");
+STATISTIC(NumFastIselFailUMulWithOverflow,
+ "Fast isel fails on umul.with.overflow");
+STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress");
+STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call");
+STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call");
+STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call");
#endif
static cl::opt<bool>
@@ -974,7 +993,37 @@ static void collectFailStats(const Instruction *I) {
case Instruction::FCmp: NumFastIselFailFCmp++; return;
case Instruction::PHI: NumFastIselFailPHI++; return;
case Instruction::Select: NumFastIselFailSelect++; return;
- case Instruction::Call: NumFastIselFailCall++; return;
+ case Instruction::Call: {
+ if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ NumFastIselFailIntrinsicCall++; return;
+ case Intrinsic::sadd_with_overflow:
+ NumFastIselFailSAddWithOverflow++; return;
+ case Intrinsic::uadd_with_overflow:
+ NumFastIselFailUAddWithOverflow++; return;
+ case Intrinsic::ssub_with_overflow:
+ NumFastIselFailSSubWithOverflow++; return;
+ case Intrinsic::usub_with_overflow:
+ NumFastIselFailUSubWithOverflow++; return;
+ case Intrinsic::smul_with_overflow:
+ NumFastIselFailSMulWithOverflow++; return;
+ case Intrinsic::umul_with_overflow:
+ NumFastIselFailUMulWithOverflow++; return;
+ case Intrinsic::frameaddress:
+ NumFastIselFailFrameaddress++; return;
+ case Intrinsic::sqrt:
+ NumFastIselFailSqrt++; return;
+ case Intrinsic::experimental_stackmap:
+ NumFastIselFailStackMap++; return;
+ case Intrinsic::experimental_patchpoint_void: // fall-through
+ case Intrinsic::experimental_patchpoint_i64:
+ NumFastIselFailPatchPoint++; return;
+ }
+ }
+ NumFastIselFailCall++;
+ return;
+ }
case Instruction::Shl: NumFastIselFailShl++; return;
case Instruction::LShr: NumFastIselFailLShr++; return;
case Instruction::AShr: NumFastIselFailAShr++; return;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b75d805..42372a2 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -105,7 +105,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
.setSExtResult(isSigned).setZExtResult(!isSigned);
return LowerCallTo(CLI);
@@ -327,6 +327,10 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
+ // Early return, as this function cannot handle vector types.
+ if (Op.getValueType().isVector())
+ return false;
+
// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
@@ -1146,18 +1150,21 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
if (!N)
return false;
- bool IsVec = false;
const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
if (!CN) {
const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
- IsVec = true;
- CN = BV->getConstantSplatValue();
+ BitVector UndefElements;
+ CN = BV->getConstantSplatNode(&UndefElements);
+ // Only interested in constant splats, and we don't try to handle undef
+ // elements in identifying boolean constants.
+ if (!CN || UndefElements.none())
+ return false;
}
- switch (getBooleanContents(IsVec)) {
+ switch (getBooleanContents(N->getValueType(0))) {
case UndefinedBooleanContent:
return CN->getAPIntValue()[0];
case ZeroOrOneBooleanContent:
@@ -1173,18 +1180,21 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!N)
return false;
- bool IsVec = false;
const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
if (!CN) {
const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
- IsVec = true;
- CN = BV->getConstantSplatValue();
+ BitVector UndefElements;
+ CN = BV->getConstantSplatNode(&UndefElements);
+ // Only interested in constant splats, and we don't try to handle undef
+ // elements in identifying boolean constants.
+ if (!CN || UndefElements.none())
+ return false;
}
- if (getBooleanContents(IsVec) == UndefinedBooleanContent)
+ if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
return !CN->getAPIntValue()[0];
return CN->isNullValue();
@@ -1205,7 +1215,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
case ISD::SETFALSE2: return DAG.getConstant(0, VT);
case ISD::SETTRUE:
case ISD::SETTRUE2: {
- TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector());
+ TargetLowering::BooleanContent Cnt =
+ getBooleanContents(N0->getValueType(0));
return DAG.getConstant(
Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
}
@@ -1412,7 +1423,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
NewConst, Cond);
- return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT);
+ return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
}
break;
}
@@ -1496,7 +1507,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
} else if (N1C->getAPIntValue() == 1 &&
(VT == MVT::i1 ||
- getBooleanContents(false) == ZeroOrOneBooleanContent)) {
+ getBooleanContents(N0->getValueType(0)) ==
+ ZeroOrOneBooleanContent)) {
SDValue Op0 = N0;
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
@@ -1767,7 +1779,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// The sext(setcc()) => setcc() optimization relies on the appropriate
// constant being emitted.
uint64_t EqVal = 0;
- switch (getBooleanContents(N0.getValueType().isVector())) {
+ switch (getBooleanContents(N0.getValueType())) {
case UndefinedBooleanContent:
case ZeroOrOneBooleanContent:
EqVal = ISD::isTrueWhenEqual(Cond);
@@ -2613,7 +2625,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
if (ShAmt) {
// TODO: For UDIV use SRL instead of SRA.
SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
- Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+ Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false,
+ true);
d = d.ashr(ShAmt);
}
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index 1120be8..0e89bad 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -15,8 +15,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
- : DL(TM.getDataLayout()) {
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL)
+ : DL(DL) {
}
TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 4dd87dd..3ba502f 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -28,10 +28,9 @@ using namespace llvm;
#define DEBUG_TYPE "stackmaps"
namespace llvm {
-cl::opt<bool> EnableStackMapLiveness("enable-stackmap-liveness",
- cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass"));
cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness",
- cl::Hidden, cl::desc("Enable PatchPoint Liveness Analysis Pass"));
+ cl::Hidden, cl::init(true),
+ cl::desc("Enable PatchPoint Liveness Analysis Pass"));
}
STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
@@ -62,15 +61,17 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
/// Calculate the liveness information for the given machine function.
bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
+ if (!EnablePatchPointLiveness)
+ return false;
+
DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
<< _MF.getName() << " **********\n");
MF = &_MF;
TRI = MF->getTarget().getRegisterInfo();
++NumStackMapFuncVisited;
- // Skip this function if there are no stackmaps or patchpoints to process.
- if (!((MF->getFrameInfo()->hasStackMap() && EnableStackMapLiveness) ||
- (MF->getFrameInfo()->hasPatchPoint() && EnablePatchPointLiveness))) {
+ // Skip this function if there are no patchpoints to process.
+ if (!MF->getFrameInfo()->hasPatchPoint()) {
++NumStackMapFuncSkipped;
return false;
}
@@ -88,13 +89,10 @@ bool StackMapLiveness::calculateLiveness() {
LiveRegs.addLiveOuts(MBBI);
bool HasStackMap = false;
// Reverse iterate over all instructions and add the current live register
- // set to an instruction if we encounter a stackmap or patchpoint
- // instruction.
+ // set to an instruction if we encounter a patchpoint instruction.
for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(),
E = MBBI->rend(); I != E; ++I) {
- int Opc = I->getOpcode();
- if ((EnableStackMapLiveness && (Opc == TargetOpcode::STACKMAP)) ||
- (EnablePatchPointLiveness && (Opc == TargetOpcode::PATCHPOINT))) {
+ if (I->getOpcode() == TargetOpcode::PATCHPOINT) {
addLiveOutSetToMI(*I);
HasChanged = true;
HasStackMap = true;
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index c3f84c6..83966bd0 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -671,7 +671,7 @@ bool TargetInstrInfo::usePreRAHazardRecognizer() const {
// Default implementation of CreateTargetRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetHazardRecognizer(const TargetMachine *TM,
+CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const {
// Dummy hazard recognizer allows all instructions to issue.
return new ScheduleHazardRecognizer();
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 2634d71..c574fd4 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
/// InitLibcallNames - Set default libcall names.
///
-static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
+static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::SHL_I16] = "__ashlhi3";
Names[RTLIB::SHL_I32] = "__ashlsi3";
Names[RTLIB::SHL_I64] = "__ashldi3";
@@ -384,7 +384,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";
- if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) {
+ if (TT.getEnvironment() == Triple::GNU) {
Names[RTLIB::SINCOS_F32] = "sincosf";
Names[RTLIB::SINCOS_F64] = "sincos";
Names[RTLIB::SINCOS_F80] = "sincosl";
@@ -399,7 +399,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::SINCOS_PPCF128] = nullptr;
}
- if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) {
+ if (TT.getOS() != Triple::OpenBSD) {
Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
} else {
// These are generally not available.
@@ -690,6 +690,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
+ BooleanFloatContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::ILP;
JumpBufSize = 0;
@@ -702,7 +703,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
SupportJumpTables = true;
MinimumJumpTableEntries = 4;
- InitLibcallNames(LibcallRoutineNames, TM);
+ InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple()));
InitCmpLibcallCCs(CmpLibcallCCs);
InitLibcallCallingConvs(LibcallCallingConvs);
}
@@ -730,6 +731,10 @@ void TargetLoweringBase::initActions() {
setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
}
+ // Most backends expect to see the node which just returns the value loaded.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+ (MVT::SimpleValueType)VT, Expand);
+
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -739,8 +744,15 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT >= MVT::FIRST_VECTOR_VALUETYPE &&
- VT <= MVT::LAST_VECTOR_VALUETYPE)
+ VT <= MVT::LAST_VECTOR_VALUETYPE) {
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,
+ (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG,
+ (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG,
+ (MVT::SimpleValueType)VT, Expand);
+ }
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -1080,24 +1092,25 @@ void TargetLoweringBase::computeRegisterProperties() {
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
- if (isTypeLegal(VT)) continue;
+ MVT VT = (MVT::SimpleValueType) i;
+ if (isTypeLegal(VT))
+ continue;
- // Determine if there is a legal wider type. If so, we should promote to
- // that wider vector type.
MVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
- if (NElts != 1 && !shouldSplitVectorType(VT)) {
- bool IsLegalWiderType = false;
- // First try to promote the elements of integer vectors. If no legal
- // promotion was found, fallback to the widen-vector method.
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType)nVT;
+ bool IsLegalWiderType = false;
+ LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
+ switch (PreferredAction) {
+ case TypePromoteInteger: {
+ // Try to promote the elements of integer vectors. If no legal
+ // promotion was found, fall through to the widen-vector method.
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
- && SVT.getVectorNumElements() == NElts &&
- isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+ && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
+ && SVT.getScalarType().isInteger()) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1106,15 +1119,15 @@ void TargetLoweringBase::computeRegisterProperties() {
break;
}
}
-
- if (IsLegalWiderType) continue;
-
+ if (IsLegalWiderType)
+ break;
+ }
+ case TypeWidenVector: {
// Try to widen the vector.
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType)nVT;
- if (SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts &&
- isTypeLegal(SVT)) {
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
+ if (SVT.getVectorElementType() == EltVT
+ && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1123,27 +1136,34 @@ void TargetLoweringBase::computeRegisterProperties() {
break;
}
}
- if (IsLegalWiderType) continue;
+ if (IsLegalWiderType)
+ break;
}
-
- MVT IntermediateVT;
- MVT RegisterVT;
- unsigned NumIntermediates;
- NumRegistersForVT[i] =
- getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
- RegisterVT, this);
- RegisterTypeForVT[i] = RegisterVT;
-
- MVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- unsigned NumElts = VT.getVectorNumElements();
- ValueTypeActions.setTypeAction(VT,
- NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ case TypeSplitVector:
+ case TypeScalarizeVector: {
+ MVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+ NumIntermediates, RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ if (PreferredAction == TypeScalarizeVector)
+ ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
+ else
+ ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown vector legalization action!");
}
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index dda2259..03f4a51 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -48,16 +48,12 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
MachineModuleInfo *MMI) const {
unsigned Encoding = getPersonalityEncoding();
- switch (Encoding & 0x70) {
- default:
- report_fatal_error("We do not support this DWARF encoding yet!");
- case dwarf::DW_EH_PE_absptr:
- return TM.getSymbol(GV, Mang);
- case dwarf::DW_EH_PE_pcrel: {
+ if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
TM.getSymbol(GV, Mang)->getName());
- }
- }
+ if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
+ return TM.getSymbol(GV, Mang);
+ report_fatal_error("We do not support this DWARF encoding yet!");
}
void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
@@ -196,6 +192,18 @@ getELFSectionFlags(SectionKind K) {
return Flags;
}
+static const Comdat *getELFComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return nullptr;
+
+ if (C->getSelectionKind() != Comdat::Any)
+ report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" +
+ C->getName() + "' cannot be lowered.");
+
+ return C;
+}
+
const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
@@ -204,14 +212,20 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
// Infer section flags from the section name if we can.
Kind = getELFKindForNamedSection(SectionName, Kind);
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (const Comdat *C = getELFComdat(GV)) {
+ Group = C->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
return getContext().getELFSection(SectionName,
- getELFSectionType(SectionName, Kind),
- getELFSectionFlags(Kind), Kind);
+ getELFSectionType(SectionName, Kind), Flags,
+ Kind, /*EntrySize=*/0, Group);
}
/// getSectionPrefixForGlobal - Return the section prefix name used by options
/// FunctionsSections and DataSections.
-static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
if (Kind.isText()) return ".text.";
if (Kind.isReadOnly()) return ".rodata.";
if (Kind.isBSS()) return ".bss.";
@@ -228,7 +242,6 @@ static const char *getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro.";
}
-
const MCSection *TargetLoweringObjectFileELF::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM) const {
@@ -242,18 +255,20 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// If this global is linkonce/weak and the target handles this by emitting it
// into a 'uniqued' section name, create and return the section now.
- if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+ if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
!Kind.isCommon()) {
- const char *Prefix;
- Prefix = getSectionPrefixForGlobal(Kind);
+ StringRef Prefix = getSectionPrefixForGlobal(Kind);
- SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ SmallString<128> Name(Prefix);
TM.getNameWithPrefix(Name, GV, Mang, true);
StringRef Group = "";
unsigned Flags = getELFSectionFlags(Kind);
- if (GV->isWeakForLinker()) {
- Group = Name.substr(strlen(Prefix));
+ if (GV->isWeakForLinker() || GV->hasComdat()) {
+ if (const Comdat *C = getELFComdat(GV))
+ Group = C->getName();
+ else
+ Group = Name.substr(Prefix.size());
Flags |= ELF::SHF_GROUP;
}
@@ -340,7 +355,7 @@ getSectionForConstant(SectionKind Kind) const {
}
const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
- unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
+ unsigned Priority, const MCSymbol *KeySym) const {
// The default scheme is .ctor / .dtor, so we have to invert the priority
// numbering.
if (Priority == 65535)
@@ -360,7 +375,7 @@ const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
}
const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
- unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
+ unsigned Priority, const MCSymbol *KeySym) const {
// The default scheme is .ctor / .dtor, so we have to invert the priority
// numbering.
if (Priority == 65535)
@@ -487,6 +502,15 @@ emitModuleFlags(MCStreamer &Streamer,
Streamer.AddBlankLine();
}
+static void checkMachOComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return;
+
+ report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() +
+ "' cannot be lowered.");
+}
+
const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
@@ -494,6 +518,9 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
StringRef Segment, Section;
unsigned TAA = 0, StubSize = 0;
bool TAAParsed;
+
+ checkMachOComdat(GV);
+
std::string ErrorCode =
MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
TAA, TAAParsed, StubSize);
@@ -564,6 +591,7 @@ bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols(
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM) const {
+ checkMachOComdat(GV);
// Handle thread local data.
if (Kind.isThreadBSS()) return TLSBSSSection;
@@ -732,6 +760,50 @@ getCOFFSectionFlags(SectionKind K) {
return Flags;
}
+static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ assert(C && "expected GV to have a Comdat!");
+
+ StringRef ComdatGVName = C->getName();
+ const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName);
+ if (!ComdatGV)
+ report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+ "' does not exist.");
+
+ if (ComdatGV->getComdat() != C)
+ report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+ "' is not a key for it's COMDAT.");
+
+ return ComdatGV;
+}
+
+static int getSelectionForCOFF(const GlobalValue *GV) {
+ if (const Comdat *C = GV->getComdat()) {
+ const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);
+ if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey))
+ ComdatKey = GA->getBaseObject();
+ if (ComdatKey == GV) {
+ switch (C->getSelectionKind()) {
+ case Comdat::Any:
+ return COFF::IMAGE_COMDAT_SELECT_ANY;
+ case Comdat::ExactMatch:
+ return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH;
+ case Comdat::Largest:
+ return COFF::IMAGE_COMDAT_SELECT_LARGEST;
+ case Comdat::NoDuplicates:
+ return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+ case Comdat::SameSize:
+ return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE;
+ }
+ } else {
+ return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
+ }
+ } else if (GV->isWeakForLinker()) {
+ return COFF::IMAGE_COMDAT_SELECT_ANY;
+ }
+ return 0;
+}
+
const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
@@ -739,11 +811,21 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
unsigned Characteristics = getCOFFSectionFlags(Kind);
StringRef Name = GV->getSection();
StringRef COMDATSymName = "";
- if (GV->isWeakForLinker()) {
- Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
- Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
- MCSymbol *Sym = TM.getSymbol(GV, Mang);
- COMDATSymName = Sym->getName();
+ if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) {
+ Selection = getSelectionForCOFF(GV);
+ const GlobalValue *ComdatGV;
+ if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
+ ComdatGV = getComdatGVForCOFF(GV);
+ else
+ ComdatGV = GV;
+
+ if (!ComdatGV->hasPrivateLinkage()) {
+ MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+ COMDATSymName = Sym->getName();
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ } else {
+ Selection = 0;
+ }
}
return getContext().getCOFFSection(Name,
Characteristics,
@@ -780,17 +862,27 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// into a 'uniqued' section name, create and return the section now.
// Section names depend on the name of the symbol which is not feasible if the
// symbol has private linkage.
- if ((GV->isWeakForLinker() || EmitUniquedSection) &&
- !GV->hasPrivateLinkage() && !Kind.isCommon()) {
+ if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
+ !Kind.isCommon()) {
const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
unsigned Characteristics = getCOFFSectionFlags(Kind);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
- MCSymbol *Sym = TM.getSymbol(GV, Mang);
- return getContext().getCOFFSection(
- Name, Characteristics, Kind, Sym->getName(),
- GV->isWeakForLinker() ? COFF::IMAGE_COMDAT_SELECT_ANY
- : COFF::IMAGE_COMDAT_SELECT_NODUPLICATES);
+ int Selection = getSelectionForCOFF(GV);
+ if (!Selection)
+ Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+ const GlobalValue *ComdatGV;
+ if (GV->hasComdat())
+ ComdatGV = getComdatGVForCOFF(GV);
+ else
+ ComdatGV = GV;
+
+ if (!ComdatGV->hasPrivateLinkage()) {
+ MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+ StringRef COMDATSymName = Sym->getName();
+ return getContext().getCOFFSection(Name, Characteristics, Kind,
+ COMDATSymName, Selection);
+ }
}
if (Kind.isText())
@@ -868,8 +960,7 @@ emitModuleFlags(MCStreamer &Streamer,
static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
const MCSection *Sec,
- const MCSymbol *KeySym,
- const MCSection *KeySec) {
+ const MCSymbol *KeySym) {
// Return the normal section if we don't have to be associative.
if (!KeySym)
return Sec;
@@ -877,20 +968,19 @@ static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
// Make an associative section with the same name and kind as the normal
// section.
const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec);
- const MCSectionCOFF *KeySecCOFF = cast<MCSectionCOFF>(KeySec);
unsigned Characteristics =
SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT;
return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics,
SecCOFF->getKind(), KeySym->getName(),
- COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, KeySecCOFF);
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
}
const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
- unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
- return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym, KeySec);
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym);
}
const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
- unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
- return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym, KeySec);
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym);
}