aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-12-01 14:51:49 -0800
committerStephen Hines <srhines@google.com>2014-12-02 16:08:10 -0800
commit37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch)
tree8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /lib/CodeGen
parentd2327b22152ced7bc46dc629fc908959e8a52d03 (diff)
downloadexternal_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2
Update aosp/master LLVM for rebase to r222494.
Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp34
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h65
-rw-r--r--lib/CodeGen/AllocationOrder.h4
-rw-r--r--lib/CodeGen/Analysis.cpp51
-rw-r--r--lib/CodeGen/Android.mk7
-rw-r--r--lib/CodeGen/AntiDepBreaker.h24
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h4
-rw-r--r--lib/CodeGen/AsmPrinter/Android.mk1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp364
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp34
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterHandler.h4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp11
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h4
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp32
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h8
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp97
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h6
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h144
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocList.h8
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp16
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h12
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp862
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h250
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp1128
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h203
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h37
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp60
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h45
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp8
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.h13
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp668
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h228
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h4
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.h52
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp167
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h4
-rw-r--r--lib/CodeGen/AtomicExpandLoadLinkedPass.cpp380
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp563
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp39
-rw-r--r--lib/CodeGen/BranchFolding.cpp92
-rw-r--r--lib/CodeGen/BranchFolding.h28
-rw-r--r--lib/CodeGen/CMakeLists.txt9
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp12
-rw-r--r--lib/CodeGen/CallingConvLower.cpp14
-rw-r--r--lib/CodeGen/CodeGen.cpp3
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp795
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp51
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h27
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp21
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp22
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp8
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp14
-rw-r--r--lib/CodeGen/ErlangGC.cpp3
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp9
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp4
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp7
-rw-r--r--lib/CodeGen/ForwardControlFlowIntegrity.cpp374
-rw-r--r--lib/CodeGen/GCMetadata.cpp2
-rw-r--r--lib/CodeGen/GCStrategy.cpp5
-rw-r--r--lib/CodeGen/GlobalMerge.cpp5
-rw-r--r--lib/CodeGen/IfConversion.cpp30
-rw-r--r--lib/CodeGen/InlineSpiller.cpp68
-rw-r--r--lib/CodeGen/InterferenceCache.h4
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp33
-rw-r--r--lib/CodeGen/JITCodeEmitter.cpp14
-rw-r--r--lib/CodeGen/JumpInstrTables.cpp17
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp65
-rw-r--r--lib/CodeGen/LexicalScopes.cpp11
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp74
-rw-r--r--lib/CodeGen/LiveDebugVariables.h6
-rw-r--r--lib/CodeGen/LiveInterval.cpp2
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp16
-rw-r--r--lib/CodeGen/LiveRangeCalc.h4
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp7
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp3
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp5
-rw-r--r--lib/CodeGen/LiveVariables.cpp262
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp9
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp48
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp9
-rw-r--r--lib/CodeGen/MachineCSE.cpp35
-rw-r--r--lib/CodeGen/MachineCodeEmitter.cpp14
-rw-r--r--lib/CodeGen/MachineCombiner.cpp435
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp5
-rw-r--r--lib/CodeGen/MachineDominanceFrontier.cpp54
-rw-r--r--lib/CodeGen/MachineDominators.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp132
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp3
-rw-r--r--lib/CodeGen/MachineInstr.cpp131
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp19
-rw-r--r--lib/CodeGen/MachineLICM.cpp35
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp8
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp140
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp27
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp4
-rw-r--r--lib/CodeGen/MachineScheduler.cpp43
-rw-r--r--lib/CodeGen/MachineSink.cpp177
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp76
-rw-r--r--lib/CodeGen/MachineVerifier.cpp77
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp7
-rw-r--r--lib/CodeGen/PHIElimination.cpp10
-rw-r--r--lib/CodeGen/PHIEliminationUtils.h4
-rw-r--r--lib/CodeGen/Passes.cpp26
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp795
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp64
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp5
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp47
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h4
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp8
-rw-r--r--lib/CodeGen/RegAllocBase.cpp3
-rw-r--r--lib/CodeGen/RegAllocBase.h6
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp3
-rw-r--r--lib/CodeGen/RegAllocFast.cpp36
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp30
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp777
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp7
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp108
-rw-r--r--lib/CodeGen/RegisterCoalescer.h50
-rw-r--r--lib/CodeGen/RegisterPressure.cpp3
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp131
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp9
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp28
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp1
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp2257
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp1339
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp117
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp48
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h5
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp453
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp125
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp68
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h18
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp66
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp96
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp50
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h58
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp32
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp80
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp38
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp477
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1021
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h43
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp14
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp147
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp132
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp2
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp53
-rw-r--r--lib/CodeGen/SpillPlacement.cpp54
-rw-r--r--lib/CodeGen/SpillPlacement.h13
-rw-r--r--lib/CodeGen/Spiller.cpp184
-rw-r--r--lib/CodeGen/Spiller.h9
-rw-r--r--lib/CodeGen/SplitKit.cpp33
-rw-r--r--lib/CodeGen/SplitKit.h4
-rw-r--r--lib/CodeGen/StackColoring.cpp2
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp4
-rw-r--r--lib/CodeGen/StackMaps.cpp32
-rw-r--r--lib/CodeGen/StackProtector.cpp9
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp4
-rw-r--r--lib/CodeGen/TailDuplication.cpp9
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp6
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp102
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp87
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp158
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp7
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp11
-rw-r--r--lib/CodeGen/TargetSchedule.cpp33
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp28
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp11
-rw-r--r--lib/CodeGen/VirtRegMap.cpp13
182 files changed, 11740 insertions, 7292 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 0f38c64..91c1314 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -111,18 +110,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
}
-
-
-AggressiveAntiDepBreaker::
-AggressiveAntiDepBreaker(MachineFunction& MFi,
- const RegisterClassInfo &RCI,
- TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
- AntiDepBreaker(), MF(MFi),
- MRI(MF.getRegInfo()),
- TII(MF.getTarget().getInstrInfo()),
- TRI(MF.getTarget().getRegisterInfo()),
- RegClassInfo(RCI),
- State(nullptr) {
+AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
+ MachineFunction &MFi, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs)
+ : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
+ TII(MF.getSubtarget().getInstrInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
+ State(nullptr) {
/* Collect a bitset of all registers that are only broken if they
are on the critical path. */
for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
@@ -262,11 +256,8 @@ static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
P != PE; ++P) {
if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
- unsigned Reg = P->getReg();
- if (RegSet.count(Reg) == 0) {
+ if (RegSet.insert(P->getReg()).second)
Edges.push_back(&*P);
- RegSet.insert(Reg);
- }
}
}
}
@@ -527,7 +518,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
BV &= RCBV;
}
- DEBUG(dbgs() << " " << RC->getName());
+ DEBUG(dbgs() << " " << TRI->getRegClassName(RC));
}
return BV;
@@ -582,7 +573,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
unsigned Reg = Regs[i];
if (Reg == SuperReg) continue;
bool IsSub = TRI->isSubRegister(SuperReg, Reg);
- assert(IsSub && "Expecting group subregister");
+ // FIXME: remove this once PR18663 has been properly fixed. For now,
+ // return a conservative answer:
+ // assert(IsSub && "Expecting group subregister");
if (!IsSub)
return false;
}
@@ -618,8 +611,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
DEBUG(dbgs() << "\tFind Registers:");
- if (RenameOrder.count(SuperRC) == 0)
- RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
unsigned OrigR = RenameOrder[SuperRC];
unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 2ab9d89..12cf95b 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
-#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
@@ -32,88 +32,83 @@
namespace llvm {
class RegisterClassInfo;
- /// Class AggressiveAntiDepState
/// Contains all the state necessary for anti-dep breaking.
class AggressiveAntiDepState {
public:
- /// RegisterReference - Information about a register reference
- /// within a liverange
+ /// Information about a register reference within a liverange
typedef struct {
- /// Operand - The registers operand
+ /// The registers operand
MachineOperand *Operand;
- /// RC - The register class
+ /// The register class
const TargetRegisterClass *RC;
} RegisterReference;
private:
- /// NumTargetRegs - Number of non-virtual target registers
- /// (i.e. TRI->getNumRegs()).
+ /// Number of non-virtual target registers (i.e. TRI->getNumRegs()).
const unsigned NumTargetRegs;
- /// GroupNodes - Implements a disjoint-union data structure to
+ /// Implements a disjoint-union data structure to
/// form register groups. A node is represented by an index into
/// the vector. A node can "point to" itself to indicate that it
/// is the parent of a group, or point to another node to indicate
/// that it is a member of the same group as that node.
std::vector<unsigned> GroupNodes;
- /// GroupNodeIndices - For each register, the index of the GroupNode
+ /// For each register, the index of the GroupNode
/// currently representing the group that the register belongs to.
/// Register 0 is always represented by the 0 group, a group
/// composed of registers that are not eligible for anti-aliasing.
std::vector<unsigned> GroupNodeIndices;
- /// RegRefs - Map registers to all their references within a live range.
+ /// Map registers to all their references within a live range.
std::multimap<unsigned, RegisterReference> RegRefs;
- /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
std::vector<unsigned> KillIndices;
- /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// The index of the most recent complete def (proceding bottom
/// up), or ~0u if the register is live.
std::vector<unsigned> DefIndices;
public:
AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
- /// GetKillIndices - Return the kill indices.
+ /// Return the kill indices.
std::vector<unsigned> &GetKillIndices() { return KillIndices; }
- /// GetDefIndices - Return the define indices.
+ /// Return the define indices.
std::vector<unsigned> &GetDefIndices() { return DefIndices; }
- /// GetRegRefs - Return the RegRefs map.
+ /// Return the RegRefs map.
std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
- // GetGroup - Get the group for a register. The returned value is
+ // Get the group for a register. The returned value is
// the index of the GroupNode representing the group.
unsigned GetGroup(unsigned Reg);
- // GetGroupRegs - Return a vector of the registers belonging to a
- // group. If RegRefs is non-NULL then only included referenced registers.
+ // Return a vector of the registers belonging to a group.
+ // If RegRefs is non-NULL then only included referenced registers.
void GetGroupRegs(
unsigned Group,
std::vector<unsigned> &Regs,
std::multimap<unsigned,
AggressiveAntiDepState::RegisterReference> *RegRefs);
- // UnionGroups - Union Reg1's and Reg2's groups to form a new
- // group. Return the index of the GroupNode representing the
- // group.
+ // Union Reg1's and Reg2's groups to form a new group.
+ // Return the index of the GroupNode representing the group.
unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
- // LeaveGroup - Remove a register from its current group and place
+ // Remove a register from its current group and place
// it alone in its own group. Return the index of the GroupNode
// representing the registers new group.
unsigned LeaveGroup(unsigned Reg);
- /// IsLive - Return true if Reg is live
+ /// Return true if Reg is live.
bool IsLive(unsigned Reg);
};
- /// Class AggressiveAntiDepBreaker
class AggressiveAntiDepBreaker : public AntiDepBreaker {
MachineFunction& MF;
MachineRegisterInfo &MRI;
@@ -121,12 +116,11 @@ class RegisterClassInfo;
const TargetRegisterInfo *TRI;
const RegisterClassInfo &RegClassInfo;
- /// CriticalPathSet - The set of registers that should only be
+ /// The set of registers that should only be
/// renamed if they are on the critical path.
BitVector CriticalPathSet;
- /// State - The state used to identify and rename anti-dependence
- /// registers.
+ /// The state used to identify and rename anti-dependence registers.
AggressiveAntiDepState *State;
public:
@@ -135,11 +129,10 @@ class RegisterClassInfo;
TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
~AggressiveAntiDepBreaker();
- /// Start - Initialize anti-dep breaking for a new basic block.
+ /// Initialize anti-dep breaking for a new basic block.
void StartBlock(MachineBasicBlock *BB) override;
- /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
- /// path
+ /// Identifiy anti-dependencies along the critical path
/// of the ScheduleDAG and break them by renaming registers.
///
unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
@@ -148,24 +141,24 @@ class RegisterClassInfo;
unsigned InsertPosIndex,
DbgValueVector &DbgValues) override;
- /// Observe - Update liveness information to account for the current
+ /// Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
void Observe(MachineInstr *MI, unsigned Count,
unsigned InsertPosIndex) override;
- /// Finish - Finish anti-dep breaking for a basic block.
+ /// Finish anti-dep breaking for a basic block.
void FinishBlock() override;
private:
/// Keep track of a position in the allocation order for each regclass.
typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
- /// IsImplicitDefUse - Return true if MO represents a register
+ /// Return true if MO represents a register
/// that is both implicitly used and defined in MI
bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
- /// GetPassthruRegs - If MI implicitly def/uses a register, then
+ /// If MI implicitly def/uses a register, then
/// return that register and all subregisters.
void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 64ff2a7..1e4eaa7 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
-#define LLVM_CODEGEN_ALLOCATIONORDER_H
+#ifndef LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 1bdf312..9a3b790 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -25,6 +25,9 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+
using namespace llvm;
/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
@@ -106,15 +109,16 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
}
/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+GlobalValue *llvm::ExtractTypeInfo(Value *V) {
V = V->stripPointerCasts();
- GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+ GlobalValue *GV = dyn_cast<GlobalValue>(V);
+ GlobalVariable *Var = dyn_cast<GlobalVariable>(V);
- if (GV && GV->getName() == "llvm.eh.catch.all.value") {
- assert(GV->hasInitializer() &&
+ if (Var && Var->getName() == "llvm.eh.catch.all.value") {
+ assert(Var->hasInitializer() &&
"The EH catch-all value must have an initializer");
- Value *Init = GV->getInitializer();
- GV = dyn_cast<GlobalVariable>(Init);
+ Value *Init = Var->getInitializer();
+ GV = dyn_cast<GlobalValue>(Init);
if (!GV) V = cast<ConstantPointerNull>(Init);
}
@@ -475,7 +479,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -490,8 +494,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!DAG.getTarget().Options.GuaranteedTailCallOpt ||
- !isa<UnreachableInst>(Term)))
+ (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
return false;
// If I will have a chain, make sure no other instruction that will have a
@@ -509,8 +512,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
return false;
}
- return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret,
- *DAG.getTarget().getTargetLowering());
+ return returnTypeIsEligibleForTailCall(
+ ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering());
}
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
@@ -607,3 +610,29 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
return true;
}
+
+bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
+ if (!GV->hasLinkOnceODRLinkage())
+ return false;
+
+ if (GV->hasUnnamedAddr())
+ return true;
+
+ // If it is a non constant variable, it needs to be uniqued across shared
+ // objects.
+ if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
+ if (!Var->isConstant())
+ return false;
+ }
+
+ // An alias can point to a variable. We could try to resolve the alias to
+ // decide, but for now just don't hide them.
+ if (isa<GlobalAlias>(GV))
+ return false;
+
+ GlobalStatus GS;
+ if (GlobalStatus::analyzeGlobal(GV, GS))
+ return false;
+
+ return !GS.IsCompared;
+}
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 05e5c45..5cb351d 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -4,7 +4,7 @@ codegen_SRC_FILES := \
AggressiveAntiDepBreaker.cpp \
AllocationOrder.cpp \
Analysis.cpp \
- AtomicExpandLoadLinkedPass.cpp \
+ AtomicExpandPass.cpp \
BasicTargetTransformInfo.cpp \
BranchFolding.cpp \
CalcSpillWeights.cpp \
@@ -21,6 +21,7 @@ codegen_SRC_FILES := \
ExecutionDepsFix.cpp \
ExpandISelPseudos.cpp \
ExpandPostRAPseudos.cpp \
+ ForwardControlFlowIntegrity.cpp \
GCMetadata.cpp \
GCMetadataPrinter.cpp \
GCStrategy.cpp \
@@ -29,7 +30,6 @@ codegen_SRC_FILES := \
InlineSpiller.cpp \
InterferenceCache.cpp \
IntrinsicLowering.cpp \
- JITCodeEmitter.cpp \
JumpInstrTables.cpp \
LatencyPriorityQueue.cpp \
LexicalScopes.cpp \
@@ -49,7 +49,7 @@ codegen_SRC_FILES := \
MachineBlockFrequencyInfo.cpp \
MachineBlockPlacement.cpp \
MachineBranchProbabilityInfo.cpp \
- MachineCodeEmitter.cpp \
+ MachineCombiner.cpp \
MachineCopyPropagation.cpp \
MachineCSE.cpp \
MachineDominators.cpp \
@@ -97,7 +97,6 @@ codegen_SRC_FILES := \
ShadowStackGC.cpp \
SjLjEHPrepare.cpp \
SlotIndexes.cpp \
- Spiller.cpp \
SpillPlacement.cpp \
SplitKit.cpp \
StackColoring.cpp \
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index df47f98..a61a8ef 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
-#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -25,9 +25,8 @@
namespace llvm {
-/// AntiDepBreaker - This class works into conjunction with the
-/// post-RA scheduler to rename registers to break register
-/// anti-dependencies.
+/// This class works in conjunction with the post-RA scheduler to rename
+/// registers to break register anti-dependencies (WAR hazards).
class AntiDepBreaker {
public:
typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
@@ -35,29 +34,26 @@ public:
virtual ~AntiDepBreaker();
- /// Start - Initialize anti-dep breaking for a new basic block.
+ /// Initialize anti-dep breaking for a new basic block.
virtual void StartBlock(MachineBasicBlock *BB) =0;
- /// BreakAntiDependencies - Identifiy anti-dependencies within a
- /// basic-block region and break them by renaming registers. Return
- /// the number of anti-dependencies broken.
- ///
+ /// Identifiy anti-dependencies within a basic-block region and break them by
+ /// renaming registers. Return the number of anti-dependencies broken.
virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
DbgValueVector &DbgValues) = 0;
- /// Observe - Update liveness information to account for the current
+ /// Update liveness information to account for the current
/// instruction, which will not be scheduled.
- ///
virtual void Observe(MachineInstr *MI, unsigned Count,
unsigned InsertPosIndex) =0;
- /// Finish - Finish anti-dep breaking for a basic block.
+ /// Finish anti-dep breaking for a basic block.
virtual void FinishBlock() =0;
- /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating
+ /// Update DBG_VALUE if dependency breaker is updating
/// other machine instruction to use NewReg.
void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 251f5ef..66c6c63 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -108,7 +108,7 @@ void ARMException::endFunction(const MachineFunction *) {
}
void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
- const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
@@ -121,9 +121,9 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
Entry = TypeInfos.size();
}
- for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ for (std::vector<const GlobalValue *>::const_reverse_iterator
I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
- const GlobalVariable *GV = *I;
+ const GlobalValue *GV = *I;
if (VerboseAsm)
Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index 42757d7..802e050 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_ADDRESSPOOL_H__
-#define CODEGEN_ASMPRINTER_ADDRESSPOOL_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index 083cc0d..cb8e96a 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -11,6 +11,7 @@ codegen_asmprinter_SRC_FILES := \
DIEHash.cpp \
DwarfAccelTable.cpp \
DwarfCFIException.cpp \
+ DwarfCompileUnit.cpp \
DwarfDebug.cpp \
DwarfFile.cpp \
DwarfStringPool.cpp \
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index f80fdea..8a32713 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -14,11 +14,13 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
+#include "Win64Exception.h"
#include "WinCodeViewLineTables.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/JumpInstrTableInfo.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -49,7 +51,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -98,11 +99,10 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
}
AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
- : MachineFunctionPass(ID),
- TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()),
- OutContext(Streamer.getContext()),
- OutStreamer(Streamer),
- LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) {
+ : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
+ MII(tm.getSubtargetImpl()->getInstrInfo()),
+ OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr),
+ LastFn(0), Counter(~0U), SetCounter(0) {
DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr;
CurrentFnSym = CurrentFnSymForSize = nullptr;
GCMetadataPrinters = nullptr;
@@ -129,12 +129,12 @@ unsigned AsmPrinter::getFunctionNumber() const {
}
const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
- return TM.getTargetLowering()->getObjFileLowering();
+ return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
}
/// getDataLayout - Return information about data layout.
const DataLayout &AsmPrinter::getDataLayout() const {
- return *TM.getDataLayout();
+ return *TM.getSubtargetImpl()->getDataLayout();
}
const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
@@ -173,9 +173,9 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
.Initialize(OutContext, TM);
- OutStreamer.InitSections();
+ OutStreamer.InitSections(false);
- Mang = new Mangler(TM.getDataLayout());
+ Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
// Emit the version-min deplyment target directive if needed.
//
@@ -222,14 +222,12 @@ bool AsmPrinter::doInitialization(Module &M) {
}
if (MAI->doesSupportDebugInformation()) {
- if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) {
+ if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment())
Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
DbgTimerName,
CodeViewLineTablesGroupName));
- } else {
- DD = new DwarfDebug(this, &M);
- Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
- }
+ DD = new DwarfDebug(this, &M);
+ Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
}
EHStreamer *ES = nullptr;
@@ -243,8 +241,13 @@ bool AsmPrinter::doInitialization(Module &M) {
case ExceptionHandling::ARM:
ES = new ARMException(this);
break;
- case ExceptionHandling::WinEH:
- ES = new Win64Exception(this);
+ case ExceptionHandling::ItaniumWinEH:
+ switch (MAI->getWinEHEncodingType()) {
+ default: llvm_unreachable("unsupported unwinding information encoding");
+ case WinEH::EncodingType::Itanium:
+ ES = new Win64Exception(this);
+ break;
+ }
break;
}
if (ES)
@@ -253,33 +256,10 @@ bool AsmPrinter::doInitialization(Module &M) {
}
static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) {
- GlobalValue::LinkageTypes Linkage = GV->getLinkage();
- if (Linkage != GlobalValue::LinkOnceODRLinkage)
- return false;
-
if (!MAI.hasWeakDefCanBeHiddenDirective())
return false;
- if (GV->hasUnnamedAddr())
- return true;
-
- // This is only used for MachO, so right now it doesn't really matter how
- // we handle alias. Revisit this once the MachO linker implements aliases.
- if (isa<GlobalAlias>(GV))
- return false;
-
- // If it is a non constant variable, it needs to be uniqued across shared
- // objects.
- if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
- if (!Var->isConstant())
- return false;
- }
-
- GlobalStatus GS;
- if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared)
- return true;
-
- return false;
+ return canBeOmittedFromSymbolTable(GV);
}
void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
@@ -361,7 +341,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
// If the alignment is specified, we *must* obey it. Overaligning a global
@@ -578,20 +558,24 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// We assume a single instruction only has a spill or reload, not
// both.
const MachineMemOperand *MMO;
- if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI,
+ FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Reload\n";
}
- } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+ } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot(
+ &MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Reload\n";
- } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE(
+ &MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Spill\n";
}
- } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+ } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot(
+ &MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Spill\n";
}
@@ -605,8 +589,9 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
/// that is an implicit def.
void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- OutStreamer.AddComment(Twine("implicit-def: ") +
- TM.getRegisterInfo()->getName(RegNo));
+ OutStreamer.AddComment(
+ Twine("implicit-def: ") +
+ TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
OutStreamer.AddBlankLine();
}
@@ -616,7 +601,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
const MachineOperand &Op = MI->getOperand(i);
assert(Op.isReg() && "KILL instruction must have only register operands");
Str += ' ';
- Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+ Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg());
Str += (Op.isDef() ? "<def>" : "<kill>");
}
AP.OutStreamer.AddComment(Str);
@@ -627,21 +612,27 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
/// of DBG_VALUE, returning true if it was able to do so. A false return
/// means the target will need to handle MI in EmitInstruction.
static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
- // This code handles only the 3-operand target-independent form.
- if (MI->getNumOperands() != 3)
+ // This code handles only the 4-operand target-independent form.
+ if (MI->getNumOperands() != 4)
return false;
SmallString<128> Str;
raw_svector_ostream OS(Str);
OS << "DEBUG_VALUE: ";
- DIVariable V(MI->getOperand(2).getMetadata());
+ DIVariable V = MI->getDebugVariable();
if (V.getContext().isSubprogram()) {
StringRef Name = DISubprogram(V.getContext()).getDisplayName();
if (!Name.empty())
OS << Name << ":";
}
- OS << V.getName() << " <- ";
+ OS << V.getName();
+
+ DIExpression Expr = MI->getDebugExpression();
+ if (Expr.isVariablePiece())
+ OS << " [piece offset=" << Expr.getPieceOffset()
+ << " size=" << Expr.getPieceSize() << "]";
+ OS << " <- ";
// The second operand is only an offset if it's an immediate.
bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
@@ -672,7 +663,8 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
Reg = MI->getOperand(0).getReg();
} else {
assert(MI->getOperand(0).isFI() && "Unknown operand type");
- const TargetFrameLowering *TFI = AP.TM.getFrameLowering();
+ const TargetFrameLowering *TFI =
+ AP.TM.getSubtargetImpl()->getFrameLowering();
Offset += TFI->getFrameIndexReference(*AP.MF,
MI->getOperand(0).getIndex(), Reg);
Deref = true;
@@ -686,7 +678,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
if (Deref)
OS << '[';
- OS << AP.TM.getRegisterInfo()->getName(Reg);
+ OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg);
}
if (Deref)
@@ -709,8 +701,8 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
}
bool AsmPrinter::needsSEHMoves() {
- return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH &&
- MF->getFunction()->needsUnwindTableEntry();
+ return MAI->getExceptionHandlingType() == ExceptionHandling::ItaniumWinEH &&
+ MF->getFunction()->needsUnwindTableEntry();
}
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
@@ -722,9 +714,6 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
if (needsCFIMoves() == CFI_M_None)
return;
- if (MMI->getCompactUnwindEncoding() != 0)
- OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
-
const MachineModuleInfo &MMI = MF->getMMI();
const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
@@ -742,12 +731,10 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
- const MachineInstr *LastMI = nullptr;
for (auto &MBB : *MF) {
// Print a label for the basic block.
EmitBasicBlockStart(MBB);
for (auto &MI : MBB) {
- LastMI = &MI;
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
@@ -804,26 +791,22 @@ void AsmPrinter::EmitFunctionBody() {
}
}
}
- }
- // If the last instruction was a prolog label, then we have a situation where
- // we emitted a prolog but no function body. This results in the ending prolog
- // label equaling the end of function label and an invalid "row" in the
- // FDE. We need to emit a noop in this situation so that the FDE's rows are
- // valid.
- bool RequiresNoop = LastMI && LastMI->isCFIInstruction();
+ EmitBasicBlockEnd(MBB);
+ }
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
- if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) {
MCInst Noop;
- TM.getInstrInfo()->getNoopForMachoTarget(Noop);
- if (Noop.getOpcode()) {
- OutStreamer.AddComment("avoids zero-length function");
+ TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop);
+ OutStreamer.AddComment("avoids zero-length function");
+
+ // Targets can opt-out of emitting the noop here by leaving the opcode
+ // unspecified.
+ if (Noop.getOpcode())
OutStreamer.EmitInstruction(Noop, getSubtargetInfo());
- } else // Target not mc-ized yet.
- OutStreamer.EmitRawText(StringRef("\tnop\n"));
}
const Function *F = MF->getFunction();
@@ -895,17 +878,18 @@ bool AsmPrinter::doFinalization(Module &M) {
unsigned Arch = Triple(getTargetTriple()).getArch();
bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
MCInst TrapInst;
- TM.getInstrInfo()->getTrap(TrapInst);
+ TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst);
+ unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment());
+
+ // Emit the right section for these functions.
+ OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
for (const auto &KV : JITI->getTables()) {
uint64_t Count = 0;
for (const auto &FunPair : KV.second) {
// Emit the function labels to make this be a function entry point.
MCSymbol *FunSym =
OutContext.GetOrCreateSymbol(FunPair.second->getName());
- OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global);
- // FIXME: JumpTableInstrInfo should store information about the required
- // alignment of table entries and the size of the padding instruction.
- EmitAlignment(3);
+ EmitAlignment(LogAlignment);
if (IsThumb)
OutStreamer.EmitThumbFunc(FunSym);
if (MAI->hasDotTypeDotSizeDirective())
@@ -920,16 +904,16 @@ bool AsmPrinter::doFinalization(Module &M) {
const MCSymbolRefExpr *TargetSymRef =
MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
OutContext);
- TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef);
+ TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch(
+ JumpToFun, TargetSymRef);
OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo());
++Count;
}
// Emit enough padding instructions to fill up to the next power of two.
- // This assumes that the trap instruction takes 8 bytes or fewer.
uint64_t Remaining = NextPowerOf2(Count) - Count;
for (uint64_t C = 0; C < Remaining; ++C) {
- EmitAlignment(3);
+ EmitAlignment(LogAlignment);
OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo());
}
@@ -976,24 +960,21 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
- if (MAI->hasSetDirective()) {
- OutStreamer.AddBlankLine();
- for (const auto &Alias : M.aliases()) {
- MCSymbol *Name = getSymbol(&Alias);
+ OutStreamer.AddBlankLine();
+ for (const auto &Alias : M.aliases()) {
+ MCSymbol *Name = getSymbol(&Alias);
- if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
- OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
- else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
- OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
- else
- assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
+ if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+ else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
- EmitVisibility(Name, Alias.getVisibility());
+ EmitVisibility(Name, Alias.getVisibility());
- // Emit the directives as assignments aka .set:
- OutStreamer.EmitAssignment(Name,
- lowerConstant(Alias.getAliasee(), *this));
- }
+ // Emit the directives as assignments aka .set:
+ OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee(), *this));
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -1062,23 +1043,14 @@ void AsmPrinter::EmitConstantPool() {
const MachineConstantPoolEntry &CPE = CP[i];
unsigned Align = CPE.getAlignment();
- SectionKind Kind;
- switch (CPE.getRelocationInfo()) {
- default: llvm_unreachable("Unknown section kind");
- case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
- case 1:
- Kind = SectionKind::getReadOnlyWithRelLocal();
- break;
- case 0:
- switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) {
- case 4: Kind = SectionKind::getMergeableConst4(); break;
- case 8: Kind = SectionKind::getMergeableConst8(); break;
- case 16: Kind = SectionKind::getMergeableConst16();break;
- default: Kind = SectionKind::getMergeableConst(); break;
- }
- }
+ SectionKind Kind =
+ CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout());
+
+ const Constant *C = nullptr;
+ if (!CPE.isMachineConstantPoolEntry())
+ C = CPE.Val.ConstVal;
- const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+ const MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
// The number of sections are small, just do a linear search from the
// last section to the first.
@@ -1101,13 +1073,22 @@ void AsmPrinter::EmitConstantPool() {
}
// Now print stuff into the calculated sections.
+ const MCSection *CurSection = nullptr;
+ unsigned Offset = 0;
for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
- OutStreamer.SwitchSection(CPSections[i].S);
- EmitAlignment(Log2_32(CPSections[i].Alignment));
-
- unsigned Offset = 0;
for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
unsigned CPI = CPSections[i].CPEs[j];
+ MCSymbol *Sym = GetCPISymbol(CPI);
+ if (!Sym->isUndefined())
+ continue;
+
+ if (CurSection != CPSections[i].S) {
+ OutStreamer.SwitchSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+ CurSection = CPSections[i].S;
+ Offset = 0;
+ }
+
MachineConstantPoolEntry CPE = CP[CPI];
// Emit inter-object padding for alignment.
@@ -1116,9 +1097,10 @@ void AsmPrinter::EmitConstantPool() {
OutStreamer.EmitZeros(NewOffset - Offset);
Type *Ty = CPE.getType();
- Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
- OutStreamer.EmitLabel(GetCPISymbol(CPI));
+ Offset = NewOffset +
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty);
+ OutStreamer.EmitLabel(Sym);
if (CPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
else
@@ -1131,7 +1113,7 @@ void AsmPrinter::EmitConstantPool() {
/// by the current function to the current output stream.
///
void AsmPrinter::EmitJumpTableInfo() {
- const DataLayout *DL = MF->getTarget().getDataLayout();
+ const DataLayout *DL = MF->getSubtarget().getDataLayout();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1156,12 +1138,14 @@ void AsmPrinter::EmitJumpTableInfo() {
} else {
// Otherwise, drop it in the readonly section.
const MCSection *ReadOnlySection =
- getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+ getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
+ /*C=*/nullptr);
OutStreamer.SwitchSection(ReadOnlySection);
JTInDiffSection = true;
}
- EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout())));
+ EmitAlignment(Log2_32(
+ MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout())));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
@@ -1174,17 +1158,17 @@ void AsmPrinter::EmitJumpTableInfo() {
// If this jump table was deleted, ignore it.
if (JTBBs.empty()) continue;
- // For the EK_LabelDifference32 entry, if the target supports .set, emit a
- // .set directive for each unique entry. This reduces the number of
- // relocations the assembler will generate for the jump table.
+ // For the EK_LabelDifference32 entry, if using .set avoids a relocation,
+ /// emit a .set directive for each unique entry.
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
- MAI->hasSetDirective()) {
+ MAI->doesSetDirectiveSuppressesReloc()) {
SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
const MachineBasicBlock *MBB = JTBBs[ii];
- if (!EmittedSets.insert(MBB)) continue;
+ if (!EmittedSets.insert(MBB).second)
+ continue;
// .set LJTSet, LBB32-base
const MCExpr *LHS =
@@ -1223,8 +1207,9 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
case MachineJumpTableInfo::EK_Inline:
llvm_unreachable("Cannot emit EK_Inline jump table entry");
case MachineJumpTableInfo::EK_Custom32:
- Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
- OutContext);
+ Value =
+ TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry(
+ MJTI, MBB, UID, OutContext);
break;
case MachineJumpTableInfo::EK_BlockAddress:
// EK_BlockAddress - Each entry is a plain address of block, e.g.:
@@ -1250,34 +1235,30 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
}
case MachineJumpTableInfo::EK_LabelDifference32: {
- // EK_LabelDifference32 - Each entry is the address of the block minus
- // the address of the jump table. This is used for PIC jump tables where
- // gprel32 is not supported. e.g.:
+ // Each entry is the address of the block minus the address of the jump
+ // table. This is used for PIC jump tables where gprel32 is not supported.
+ // e.g.:
// .word LBB123 - LJTI1_2
- // If the .set directive is supported, this is emitted as:
+ // If the .set directive avoids relocations, this is emitted as:
// .set L4_5_set_123, LBB123 - LJTI1_2
// .word L4_5_set_123
-
- // If we have emitted set directives for the jump table entries, print
- // them rather than the entries themselves. If we're emitting PIC, then
- // emit the table entries as differences between two text section labels.
- if (MAI->hasSetDirective()) {
- // If we used .set, reference the .set's symbol.
+ if (MAI->doesSetDirectiveSuppressesReloc()) {
Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
OutContext);
break;
}
- // Otherwise, use the difference as the jump table entry.
Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
- const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
- Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext);
+ Value = MCBinaryExpr::CreateSub(Value, Base, OutContext);
break;
}
}
assert(Value && "Unknown entry kind!");
- unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
+ unsigned EntrySize =
+ MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout());
OutStreamer.EmitValue(Value, EntrySize);
}
@@ -1387,7 +1368,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
}
// Emit the function pointers in the target-specific order
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
unsigned Align = Log2_32(DL->getPointerPrefAlignment());
std::stable_sort(Structors.begin(), Structors.end(),
[](const Structor &L,
@@ -1450,9 +1431,9 @@ void AsmPrinter::EmitInt32(int Value) const {
OutStreamer.EmitIntValue(Value, 4);
}
-/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
-/// in bytes of the directive is specified by Size and Hi/Lo specify the
-/// labels. This implicitly uses .set if it is available.
+/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
+/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
+/// .set if it avoids relocations.
void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
unsigned Size) const {
// Get the Hi-Lo expression.
@@ -1461,7 +1442,7 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
MCSymbolRefExpr::Create(Lo, OutContext),
OutContext);
- if (!MAI->hasSetDirective()) {
+ if (!MAI->doesSetDirectiveSuppressesReloc()) {
OutStreamer.EmitValue(Diff, Size);
return;
}
@@ -1472,36 +1453,6 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
OutStreamer.EmitSymbolValue(SetLabel, Size);
}
-/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
-/// where the size in bytes of the directive is specified by Size and Hi/Lo
-/// specify the labels. This implicitly uses .set if it is available.
-void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
- const MCSymbol *Lo,
- unsigned Size) const {
-
- // Emit Hi+Offset - Lo
- // Get the Hi+Offset expression.
- const MCExpr *Plus =
- MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
- MCConstantExpr::Create(Offset, OutContext),
- OutContext);
-
- // Get the Hi+Offset-Lo expression.
- const MCExpr *Diff =
- MCBinaryExpr::CreateSub(Plus,
- MCSymbolRefExpr::Create(Lo, OutContext),
- OutContext);
-
- if (!MAI->hasSetDirective())
- OutStreamer.EmitValue(Diff, Size);
- else {
- // Otherwise, emit with .set (aka assignment).
- MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
- OutStreamer.EmitAssignment(SetLabel, Diff);
- OutStreamer.EmitSymbolValue(SetLabel, Size);
- }
-}
-
/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
/// where the size in bytes of the directive is specified by Size and Label
/// specifies the label. This implicitly uses .set if it is available.
@@ -1531,7 +1482,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
// if required for correctness.
//
void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
- if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
+ if (GV)
+ NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(),
+ NumBits);
if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
@@ -1577,8 +1530,8 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C =
- ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(
+ CE, AP.TM.getSubtargetImpl()->getDataLayout()))
if (C != CE)
return lowerConstant(C, AP);
@@ -1592,7 +1545,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
// Generate a symbolic expression for the byte address
APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
@@ -1616,7 +1569,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
return lowerConstant(CE->getOperand(0), AP);
case Instruction::IntToPtr: {
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
@@ -1626,7 +1579,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
}
case Instruction::PtrToInt: {
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
Constant *Op = CE->getOperand(0);
@@ -1699,7 +1652,8 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64) return -1;
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType());
+ uint64_t Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType());
uint64_t Value = CI->getZExtValue();
// Make sure the constant is at least 8 bits long and has a power
@@ -1743,7 +1697,9 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, AP.TM);
if (Value != -1) {
- uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
+ uint64_t Bytes =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ CDS->getType());
// Don't emit a 1-byte object as a .fill.
if (Bytes > 1)
return AP.OutStreamer.EmitFill(Bytes, Value);
@@ -1793,7 +1749,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
}
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
unsigned Size = DL.getTypeAllocSize(CDS->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
@@ -1808,7 +1764,9 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) {
int Value = isRepeatedByteSequence(CA, AP.TM);
if (Value != -1) {
- uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
+ uint64_t Bytes =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ CA->getType());
AP.OutStreamer.EmitFill(Bytes, Value);
}
else {
@@ -1821,7 +1779,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
emitGlobalConstantImpl(CV->getOperand(i), AP);
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
unsigned Size = DL.getTypeAllocSize(CV->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
@@ -1831,7 +1789,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
// Print the fields in successive locations. Pad to align if needed!
- const DataLayout *DL = AP.TM.getDataLayout();
+ const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
unsigned Size = DL->getTypeAllocSize(CS->getType());
const StructLayout *Layout = DL->getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
@@ -1881,7 +1839,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
- if (AP.TM.getDataLayout()->isBigEndian() &&
+ if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() &&
!CFP->getType()->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
@@ -1900,13 +1858,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
}
// Emit the tail padding for the long double.
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
DL.getTypeStoreSize(CFP->getType()));
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
- const DataLayout *DL = AP.TM.getDataLayout();
+ const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
unsigned BitWidth = CI->getBitWidth();
// Copy the value as we may massage the layout for constants whose bit width
@@ -1952,7 +1910,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// Emit the extra bits after the 64-bits chunks.
// Emit a directive that fills the expected size.
- uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(CI->getType());
+ uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ CI->getType());
Size -= (BitWidth / 64) * 8;
assert(Size && Size * 8 >= ExtraBitsSize &&
(ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
@@ -1962,7 +1921,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
}
static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
- const DataLayout *DL = AP.TM.getDataLayout();
+ const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
uint64_t Size = DL->getTypeAllocSize(CV->getType());
if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size);
@@ -2027,7 +1986,8 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+ uint64_t Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
if (Size)
emitGlobalConstantImpl(CV, *this);
else if (MAI->hasSubsectionsViaSymbols()) {
@@ -2056,7 +2016,7 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
/// temporary label with the specified stem and unique ID.
MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
Name + Twine(ID));
}
@@ -2064,7 +2024,7 @@ MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const {
/// GetTempSymbol - Return an assembler temporary label with the specified
/// stem.
MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
Name);
}
@@ -2080,7 +2040,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
return OutContext.GetOrCreateSymbol
(Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ "_" + Twine(CPID));
@@ -2094,7 +2054,7 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
/// GetJTSetSymbol - Return the symbol for the specified jump table .set
/// FIXME: privatize to AsmPrinter.
MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
return OutContext.GetOrCreateSymbol
(Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
Twine(UID) + "_set_" + Twine(MBBID));
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 02cd12b..05f6a68 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -130,7 +131,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
default:
llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr:
- return TM.getDataLayout()->getPointerSize();
+ return TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
case dwarf::DW_EH_PE_udata2:
return 2;
case dwarf::DW_EH_PE_udata4:
@@ -214,13 +215,10 @@ static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset,
Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
}
-/// Emit a dwarf register operation for describing
-/// - a small value occupying only part of a register or
-/// - a small register representing only part of a value.
-static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits,
- unsigned OffsetInBits) {
- assert(SizeInBits > 0 && "zero-sized piece");
- unsigned SizeOfByte = 8;
+void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits,
+ unsigned OffsetInBits) const {
+ assert(SizeInBits > 0 && "piece has size zero");
+ const unsigned SizeOfByte = 8;
if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece");
Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits));
@@ -249,13 +247,13 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
unsigned PieceSizeInBits,
unsigned PieceOffsetInBits) const {
assert(MLoc.isReg() && "MLoc must be a register");
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
// If this is a valid register number, emit it.
if (Reg >= 0) {
emitDwarfRegOp(Streamer, Reg);
- emitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits);
+ EmitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits);
return;
}
@@ -266,19 +264,19 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
if (Reg >= 0) {
unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg());
unsigned Size = TRI->getSubRegIdxSize(Idx);
- unsigned Offset = TRI->getSubRegIdxOffset(Idx);
+ unsigned RegOffset = TRI->getSubRegIdxOffset(Idx);
OutStreamer.AddComment("super-register");
emitDwarfRegOp(Streamer, Reg);
- if (PieceOffsetInBits == Offset) {
- emitDwarfOpPiece(Streamer, Size, Offset);
+ if (PieceOffsetInBits == RegOffset) {
+ EmitDwarfOpPiece(Streamer, Size, RegOffset);
} else {
// If this is part of a variable in a sub-register at a
// non-zero offset, we need to manually shift the value into
// place, since the DW_OP_piece describes the part of the
// variable, not the position of the subregister.
- emitDwarfOpPiece(Streamer, Size, PieceOffsetInBits);
- if (Offset)
- emitDwarfOpShr(Streamer, Offset);
+ if (RegOffset)
+ emitDwarfOpShr(Streamer, RegOffset);
+ EmitDwarfOpPiece(Streamer, Size, PieceOffsetInBits);
}
return;
}
@@ -312,7 +310,7 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
if (Reg >= 0 && Intersection.any()) {
OutStreamer.AddComment("sub-register");
emitDwarfRegOp(Streamer, Reg);
- emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset);
+ EmitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset);
CurPos = Offset + Size;
// Mark it as emitted.
@@ -331,7 +329,7 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
const MachineLocation &MLoc,
bool Indirect) const {
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
if (Reg < 0) {
// We assume that pointers are always in an addressable register.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index 2825367..31867dd 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__
-#define CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
#include "llvm/Support/DataTypes.h"
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 46ee0c8..cca5f22 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -110,14 +111,14 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
HasDiagHandler = true;
}
- MemoryBuffer *Buffer;
+ std::unique_ptr<MemoryBuffer> Buffer;
if (isNullTerminated)
Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
else
Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
- SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+ SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI));
@@ -146,6 +147,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
+ if (MF) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
+ }
// Don't implicitly switch to the text section before the asm.
int Res = Parser->Run(/*NoInitialTextSection*/ true,
@@ -500,7 +505,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
/// for their own strange codes.
void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
const char *Code) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
if (!strcmp(Code, "private")) {
OS << DL->getPrivateGlobalPrefix();
} else if (!strcmp(Code, "comment")) {
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 6c01d65..0cc8353 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_BYTESTREAMER_H
-#define LLVM_CODEGEN_BYTESTREAMER_H
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index f555f21..e6b7d64 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter
DIEHash.cpp
DwarfAccelTable.cpp
DwarfCFIException.cpp
+ DwarfCompileUnit.cpp
DwarfDebug.cpp
DwarfFile.cpp
DwarfStringPool.cpp
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index c3dcd9c..50ea369 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -12,12 +12,15 @@
//===----------------------------------------------------------------------===//
#include "DIE.h"
+
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
@@ -370,6 +373,29 @@ void DIEString::print(raw_ostream &O) const {
// DIEEntry Implementation
//===----------------------------------------------------------------------===//
+/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the
+/// directive is specified by Size and Hi/Lo specify the labels.
+static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi,
+ uint64_t Offset, const MCSymbol *Lo,
+ unsigned Size) {
+ MCContext &Context = Streamer.getContext();
+
+ // Emit Hi+Offset - Lo
+ // Get the Hi+Offset expression.
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context),
+ MCConstantExpr::Create(Offset, Context), Context);
+
+ // Get the Hi+Offset-Lo expression.
+ const MCExpr *Diff = MCBinaryExpr::CreateSub(
+ Plus, MCSymbolRefExpr::Create(Lo, Context), Context);
+
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = Context.CreateTempSymbol();
+ Streamer.EmitAssignment(SetLabel, Diff);
+ Streamer.EmitSymbolValue(SetLabel, Size);
+}
+
/// EmitValue - Emit debug information entry offset.
///
void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
@@ -388,9 +414,9 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
DIEEntry::getRefAddrSize(AP));
else
- AP->EmitLabelOffsetDifference(CU->getSectionSym(), Addr,
- CU->getSectionSym(),
- DIEEntry::getRefAddrSize(AP));
+ emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr,
+ CU->getSectionSym(),
+ DIEEntry::getRefAddrSize(AP));
} else
AP->EmitInt32(Entry.getOffset());
}
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index ef05f17..e310aef 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DIE_H__
-#define CODEGEN_ASMPRINTER_DIE_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -381,10 +381,10 @@ public:
///
class DIEString : public DIEValue {
const DIEValue *Access;
- const StringRef Str;
+ StringRef Str;
public:
- DIEString(const DIEValue *Acc, const StringRef S)
+ DIEString(const DIEValue *Acc, StringRef S)
: DIEValue(isString), Access(Acc), Str(S) {}
/// getString - Grab the string out of the object.
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index c2fad59..b2a3ba8 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -261,7 +261,7 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
return;
}
- // otherwise, b) use the letter 'T' as a the marker, ...
+ // otherwise, b) use the letter 'T' as the marker, ...
addULEB128('T');
addULEB128(Attribute);
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index 175d660..872aa0e 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DIEHASH_H__
-#define CODEGEN_ASMPRINTER_DIEHASH_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
#include "DIE.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index a66d08e..0c2a5e5 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -8,25 +8,25 @@
//===----------------------------------------------------------------------===//
#include "DbgValueHistoryCalculator.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <map>
-#include <set>
+using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
-namespace llvm {
-
// \brief If @MI is a DBG_VALUE with debug value described by a
// defined register, returns the number of this register.
// In the other case, returns 0.
static unsigned isDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue());
- assert(MI.getNumOperands() == 3);
+ assert(MI.getNumOperands() == 4);
// If location of variable is described using a register (directly or
// indirecltly), this register is always a first operand.
return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
@@ -36,7 +36,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
const MachineInstr &MI) {
// Instruction range should start with a DBG_VALUE instruction for the
// variable.
- assert(MI.isDebugValue() && MI.getDebugVariable() == Var);
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
auto &Ranges = VarInstrRanges[Var];
if (!Ranges.empty() && Ranges.back().second == nullptr &&
Ranges.back().first->isIdenticalTo(&MI)) {
@@ -96,6 +96,19 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars,
VarSet.push_back(Var);
}
+// \brief Terminate the location range for variables described by register at
+// @I by inserting @ClobberingInstr to their history.
+static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
+ RegDescribedVarsMap::iterator I,
+ DbgValueHistoryMap &HistMap,
+ const MachineInstr &ClobberingInstr) {
+ // Iterate over all variables described by this register and add this
+ // instruction to their history, clobbering it.
+ for (const auto &Var : I->second)
+ HistMap.endInstrRange(Var, ClobberingInstr);
+ RegVars.erase(I);
+}
+
// \brief Terminate the location range for variables described by register
// @RegNo by inserting @ClobberingInstr to their history.
static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
@@ -104,22 +117,26 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
const auto &I = RegVars.find(RegNo);
if (I == RegVars.end())
return;
- // Iterate over all variables described by this register and add this
- // instruction to their history, clobbering it.
- for (const auto &Var : I->second)
- HistMap.endInstrRange(Var, ClobberingInstr);
- RegVars.erase(I);
+ clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);
}
-// \brief Collect all registers clobbered by @MI and insert them to @Regs.
-static void collectClobberedRegisters(const MachineInstr &MI,
+// \brief Collect all registers clobbered by @MI and apply the functor
+// @Func to their RegNo.
+// @Func should be a functor with a void(unsigned) signature. We're
+// not using std::function here for performance reasons. It has a
+// small but measurable impact. By using a functor instead of a
+// std::set& here, we can avoid the overhead of constructing
+// temporaries in calculateDbgValueHistory, which has a significant
+// performance impact.
+template<typename Callable>
+static void applyToClobberedRegisters(const MachineInstr &MI,
const TargetRegisterInfo *TRI,
- std::set<unsigned> &Regs) {
+ Callable Func) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef() || !MO.getReg())
continue;
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Regs.insert(*AI);
+ Func(*AI);
}
}
@@ -133,11 +150,12 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
// as the return instruction.
DebugLoc LastLoc = LastMI->getDebugLoc();
auto Res = LastMI;
- for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend();
- ++I) {
+ for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)),
+ E = MBB.rend();
+ I != E; ++I) {
if (I->getDebugLoc() != LastLoc)
return Res;
- Res = std::prev(I.base());
+ Res = &*I;
}
// If all instructions have the same debug location, assume whole MBB is
// an epilogue.
@@ -145,25 +163,26 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
}
// \brief Collect registers that are modified in the function body (their
-// contents is changed only in the prologue and epilogue).
+// contents is changed outside of the prologue and epilogue).
static void collectChangingRegs(const MachineFunction *MF,
const TargetRegisterInfo *TRI,
- std::set<unsigned> &Regs) {
+ BitVector &Regs) {
for (const auto &MBB : *MF) {
auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
- bool IsInEpilogue = false;
+
for (const auto &MI : MBB) {
- IsInEpilogue |= &MI == FirstEpilogueInst;
- if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue)
- collectClobberedRegisters(MI, TRI, Regs);
+ if (&MI == FirstEpilogueInst)
+ break;
+ if (!MI.getFlag(MachineInstr::FrameSetup))
+ applyToClobberedRegisters(MI, TRI, [&](unsigned r) { Regs.set(r); });
}
}
}
-void calculateDbgValueHistory(const MachineFunction *MF,
- const TargetRegisterInfo *TRI,
- DbgValueHistoryMap &Result) {
- std::set<unsigned> ChangingRegs;
+void llvm::calculateDbgValueHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &Result) {
+ BitVector ChangingRegs(TRI->getNumRegs());
collectChangingRegs(MF, TRI, ChangingRegs);
RegDescribedVarsMap RegVars;
@@ -172,17 +191,18 @@ void calculateDbgValueHistory(const MachineFunction *MF,
if (!MI.isDebugValue()) {
// Not a DBG_VALUE instruction. It may clobber registers which describe
// some variables.
- std::set<unsigned> MIClobberedRegs;
- collectClobberedRegisters(MI, TRI, MIClobberedRegs);
- for (unsigned RegNo : MIClobberedRegs) {
- if (ChangingRegs.count(RegNo))
+ applyToClobberedRegisters(MI, TRI, [&](unsigned RegNo) {
+ if (ChangingRegs.test(RegNo))
clobberRegisterUses(RegVars, RegNo, Result, MI);
- }
+ });
continue;
}
assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
- const MDNode *Var = MI.getDebugVariable();
+ // Use the base variable (without any DW_OP_piece expressions)
+ // as index into History. The full variables including the
+ // piece expressions are attached to the MI.
+ DIVariable Var = MI.getDebugVariable();
if (unsigned PrevReg = Result.getRegisterForVar(Var))
dropRegDescribedVar(RegVars, PrevReg, Var);
@@ -196,11 +216,12 @@ void calculateDbgValueHistory(const MachineFunction *MF,
// Make sure locations for register-described variables are valid only
// until the end of the basic block (unless it's the last basic block, in
// which case let their liveness run off to the end of the function).
- if (!MBB.empty() && &MBB != &MF->back()) {
- for (unsigned RegNo : ChangingRegs)
- clobberRegisterUses(RegVars, RegNo, Result, MBB.back());
+ if (!MBB.empty() && &MBB != &MF->back()) {
+ for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
+ auto CurElem = I++; // CurElem can be erased below.
+ if (ChangingRegs.test(CurElem->first))
+ clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
+ }
}
}
}
-
-}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index b9177f0..4b62007 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_
-#define CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -28,9 +28,11 @@ class DbgValueHistoryMap {
// range. If end is not specified, location is valid until the start
// instruction of the next instruction range, or until the end of the
// function.
+public:
typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
typedef SmallVector<InstrRange, 4> InstrRanges;
typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap;
+private:
InstrRangesMap VarInstrRanges;
public:
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 3beb799..6cca985 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -7,14 +7,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__
-#define CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCSymbol.h"
namespace llvm {
-class DwarfCompileUnit;
class MDNode;
/// \brief This struct describes location entries emitted in the .debug_loc
/// section.
@@ -26,25 +26,30 @@ class DebugLocEntry {
public:
/// A single location or constant.
struct Value {
- Value(const MDNode *Var, int64_t i)
- : Variable(Var), EntryKind(E_Integer) {
+ Value(const MDNode *Var, const MDNode *Expr, int64_t i)
+ : Variable(Var), Expression(Expr), EntryKind(E_Integer) {
Constant.Int = i;
}
- Value(const MDNode *Var, const ConstantFP *CFP)
- : Variable(Var), EntryKind(E_ConstantFP) {
+ Value(const MDNode *Var, const MDNode *Expr, const ConstantFP *CFP)
+ : Variable(Var), Expression(Expr), EntryKind(E_ConstantFP) {
Constant.CFP = CFP;
}
- Value(const MDNode *Var, const ConstantInt *CIP)
- : Variable(Var), EntryKind(E_ConstantInt) {
+ Value(const MDNode *Var, const MDNode *Expr, const ConstantInt *CIP)
+ : Variable(Var), Expression(Expr), EntryKind(E_ConstantInt) {
Constant.CIP = CIP;
}
- Value(const MDNode *Var, MachineLocation Loc)
- : Variable(Var), EntryKind(E_Location), Loc(Loc) {
+ Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc)
+ : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) {
+ assert(DIVariable(Var).Verify());
+ assert(DIExpression(Expr).Verify());
}
// The variable to which this location entry corresponds.
const MDNode *Variable;
+ // Any complex address location expression for this Value.
+ const MDNode *Expression;
+
// Type of entry that this represents.
enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
enum EntryType EntryKind;
@@ -59,23 +64,6 @@ public:
// Or a location in the machine frame.
MachineLocation Loc;
- bool operator==(const Value &other) const {
- if (EntryKind != other.EntryKind)
- return false;
-
- switch (EntryKind) {
- case E_Location:
- return Loc == other.Loc;
- case E_Integer:
- return Constant.Int == other.Constant.Int;
- case E_ConstantFP:
- return Constant.CFP == other.Constant.CFP;
- case E_ConstantInt:
- return Constant.CIP == other.Constant.CIP;
- }
- llvm_unreachable("unhandled EntryKind");
- }
-
bool isLocation() const { return EntryKind == E_Location; }
bool isInt() const { return EntryKind == E_Integer; }
bool isConstantFP() const { return EntryKind == E_ConstantFP; }
@@ -84,40 +72,114 @@ public:
const ConstantFP *getConstantFP() const { return Constant.CFP; }
const ConstantInt *getConstantInt() const { return Constant.CIP; }
MachineLocation getLoc() const { return Loc; }
- const MDNode *getVariable() const { return Variable; }
+ const MDNode *getVariableNode() const { return Variable; }
+ DIVariable getVariable() const { return DIVariable(Variable); }
+ bool isVariablePiece() const { return getExpression().isVariablePiece(); }
+ DIExpression getExpression() const { return DIExpression(Expression); }
+ friend bool operator==(const Value &, const Value &);
+ friend bool operator<(const Value &, const Value &);
};
+
private:
- /// A list of locations/constants belonging to this entry.
+ /// A nonempty list of locations/constants belonging to this entry,
+ /// sorted by offset.
SmallVector<Value, 1> Values;
- /// The compile unit that this location entry is referenced by.
- const DwarfCompileUnit *Unit;
-
public:
- DebugLocEntry() : Begin(nullptr), End(nullptr), Unit(nullptr) {}
- DebugLocEntry(const MCSymbol *B, const MCSymbol *E,
- Value Val, const DwarfCompileUnit *U)
- : Begin(B), End(E), Unit(U) {
+ DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val)
+ : Begin(B), End(E) {
Values.push_back(std::move(Val));
}
+ /// \brief If this and Next are describing different pieces of the same
+ // variable, merge them by appending Next's values to the current
+ // list of values.
+ // Return true if the merge was successful.
+ bool MergeValues(const DebugLocEntry &Next) {
+ if (Begin == Next.Begin) {
+ DIExpression Expr(Values[0].Expression);
+ DIVariable Var(Values[0].Variable);
+ DIExpression NextExpr(Next.Values[0].Expression);
+ DIVariable NextVar(Next.Values[0].Variable);
+ if (Var == NextVar && Expr.isVariablePiece() &&
+ NextExpr.isVariablePiece()) {
+ addValues(Next.Values);
+ End = Next.End;
+ return true;
+ }
+ }
+ return false;
+ }
+
/// \brief Attempt to merge this DebugLocEntry with Next and return
/// true if the merge was successful. Entries can be merged if they
/// share the same Loc/Constant and if Next immediately follows this
/// Entry.
- bool Merge(const DebugLocEntry &Next) {
+ bool MergeRanges(const DebugLocEntry &Next) {
+ // If this and Next are describing the same variable, merge them.
if ((End == Next.Begin && Values == Next.Values)) {
End = Next.End;
return true;
}
return false;
}
+
const MCSymbol *getBeginSym() const { return Begin; }
const MCSymbol *getEndSym() const { return End; }
- const DwarfCompileUnit *getCU() const { return Unit; }
- const ArrayRef<Value> getValues() const { return Values; }
- void addValue(Value Val) { Values.push_back(Val); }
+ ArrayRef<Value> getValues() const { return Values; }
+ void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
+ Values.append(Vals.begin(), Vals.end());
+ sortUniqueValues();
+ assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
+ return V.isVariablePiece();
+ }) && "value must be a piece");
+ }
+
+ // Sort the pieces by offset.
+ // Remove any duplicate entries by dropping all but the first.
+ void sortUniqueValues() {
+ std::sort(Values.begin(), Values.end());
+ Values.erase(std::unique(Values.begin(), Values.end(),
+ [](const Value &A, const Value &B) {
+ return A.getVariable() == B.getVariable() &&
+ A.getExpression() == B.getExpression();
+ }),
+ Values.end());
+ }
};
+/// Compare two Values for equality.
+inline bool operator==(const DebugLocEntry::Value &A,
+ const DebugLocEntry::Value &B) {
+ if (A.EntryKind != B.EntryKind)
+ return false;
+
+ if (A.Expression != B.Expression)
+ return false;
+
+ if (A.Variable != B.Variable)
+ return false;
+
+ switch (A.EntryKind) {
+ case DebugLocEntry::Value::E_Location:
+ return A.Loc == B.Loc;
+ case DebugLocEntry::Value::E_Integer:
+ return A.Constant.Int == B.Constant.Int;
+ case DebugLocEntry::Value::E_ConstantFP:
+ return A.Constant.CFP == B.Constant.CFP;
+ case DebugLocEntry::Value::E_ConstantInt:
+ return A.Constant.CIP == B.Constant.CIP;
+ }
+ llvm_unreachable("unhandled EntryKind");
+}
+
+/// Compare two pieces based on their offset.
+inline bool operator<(const DebugLocEntry::Value &A,
+ const DebugLocEntry::Value &B) {
+ return A.getExpression().getPieceOffset() <
+ B.getExpression().getPieceOffset();
}
+
+}
+
#endif
diff --git a/lib/CodeGen/AsmPrinter/DebugLocList.h b/lib/CodeGen/AsmPrinter/DebugLocList.h
index 7a51c7b..2a4f58f 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocList.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocList.h
@@ -7,16 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__
-#define CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
-#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/SmallVector.h"
#include "DebugLocEntry.h"
namespace llvm {
+class DwarfCompileUnit;
+class MCSymbol;
struct DebugLocList {
MCSymbol *Label;
+ DwarfCompileUnit *CU;
SmallVector<DebugLocEntry, 4> List;
};
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index e9527c4..7e87566 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -13,6 +13,7 @@
#include "DwarfAccelTable.h"
#include "DIE.h"
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
@@ -174,7 +175,8 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
// Walk through the buckets and emit the full data for each element in
// the bucket. For the string case emit the dies and the various offsets.
// Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) {
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
+ MCSymbol *StrSym) {
uint64_t PrevHash = UINT64_MAX;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
@@ -183,13 +185,14 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) {
// Remember to emit the label for our offset.
Asm->OutStreamer.EmitLabel((*HI)->Sym);
Asm->OutStreamer.AddComment((*HI)->Str);
- Asm->EmitSectionOffset((*HI)->Data.StrSym,
- D->getStringPool().getSectionSymbol());
+ Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym);
Asm->OutStreamer.AddComment("Num DIEs");
Asm->EmitInt32((*HI)->Data.Values.size());
for (HashDataContents *HD : (*HI)->Data.Values) {
// Emit the DIE offset
- Asm->EmitInt32(HD->Die->getOffset());
+ DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit());
+ assert(CU && "Accelerated DIE should belong to a CU.");
+ Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset());
// If we have multiple Atoms emit that info too.
// FIXME: A bit of a hack, we either emit only one atom or all info.
if (HeaderData.Atoms.size() > 1) {
@@ -206,7 +209,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) {
}
// Emit the entire data structure to the output file.
-void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) {
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
+ MCSymbol *StrSym) {
// Emit the header.
EmitHeader(Asm);
@@ -220,7 +224,7 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) {
EmitOffsets(Asm, SecBegin);
// Emit the hash data.
- EmitData(Asm, D);
+ EmitData(Asm, D, StrSym);
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index a3cc95f..3cdf678 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
-#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
#include "DIE.h"
#include "llvm/ADT/ArrayRef.h"
@@ -62,7 +62,7 @@
namespace llvm {
class AsmPrinter;
-class DwarfFile;
+class DwarfDebug;
class DwarfAccelTable {
@@ -140,7 +140,7 @@ public:
private:
struct TableHeaderData {
uint32_t die_offset_base;
- SmallVector<Atom, 1> Atoms;
+ SmallVector<Atom, 3> Atoms;
TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
: die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {}
@@ -223,7 +223,7 @@ private:
void EmitBuckets(AsmPrinter *);
void EmitHashes(AsmPrinter *);
void EmitOffsets(AsmPrinter *, MCSymbol *);
- void EmitData(AsmPrinter *, DwarfFile *D);
+ void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym);
// Allocator for HashData and HashDataContents.
BumpPtrAllocator Allocator;
@@ -248,7 +248,7 @@ public:
void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die,
char Flags = 0);
void FinalizeTable(AsmPrinter *, StringRef);
- void Emit(AsmPrinter *, MCSymbol *, DwarfFile *);
+ void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym);
#ifndef NDEBUG
void print(raw_ostream &O);
void dump() { print(dbgs()); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 74215aa..0dc52da 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -51,7 +51,7 @@ void DwarfCFIException::endModule() {
if (moveTypeModule == AsmPrinter::CFI_M_Debug)
Asm->OutStreamer.EmitCFISections(false, true);
- if (!Asm->MAI->isExceptionHandlingDwarf())
+ if (!Asm->MAI->usesItaniumLSDAForExceptions())
return;
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 0000000..2f1b0e5
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,862 @@
+#include "DwarfCompileUnit.h"
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
+ AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU)
+ : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU),
+ Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) {
+ insertDIE(Node, &getUnitDie());
+}
+
+/// addLabelAddress - Add a dwarf label attribute data and value using
+/// DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label) {
+
+ // Don't use the address pool in non-fission or in the skeleton unit itself.
+ // FIXME: Once GDB supports this, it's probably worthwhile using the address
+ // pool from the skeleton - maybe even in non-fission (possibly fewer
+ // relocations by sharing them in the pool, but we have other ideas about how
+ // to reduce the number of relocations as well/instead).
+ if (!DD->useSplitDwarf() || !Skeleton)
+ return addLocalLabelAddress(Die, Attribute, Label);
+
+ if (Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
+ unsigned idx = DD->getAddressPool().getIndex(Label);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+}
+
+void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
+ dwarf::Attribute Attribute,
+ const MCSymbol *Label) {
+ if (Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
+ Die.addValue(Attribute, dwarf::DW_FORM_addr,
+ Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label)
+ : new (DIEValueAllocator) DIEInteger(0));
+}
+
+unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
+ StringRef DirName) {
+ // If we print assembly, we can't separate .file entries according to
+ // compile units. Thus all files will belong to the default compile unit.
+
+ // FIXME: add a better feature test than hasRawTextSupport. Even better,
+ // extend .file to support this.
+ return Asm->OutStreamer.EmitDwarfFileDirective(
+ 0, DirName, FileName,
+ Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID());
+}
+
+// Return const expression if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return nullptr;
+
+ // First operand points to a global struct.
+ Value *Ptr = CE->getOperand(0);
+ if (!isa<GlobalValue>(Ptr) ||
+ !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
+ return nullptr;
+
+ // Second operand is zero.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return nullptr;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return nullptr;
+
+ return CE;
+}
+
+/// getOrCreateGlobalVariableDIE - get or create global variable DIE.
+DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
+ // Check for pre-existence.
+ if (DIE *Die = getDIE(GV))
+ return Die;
+
+ assert(GV.isGlobalVariable());
+
+ DIScope GVContext = DD->resolve(GV.getContext());
+ DIType GTy = DD->resolve(GV.getType());
+
+ // Construct the context before querying for the existence of the DIE in
+ // case such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(GVContext);
+
+ // Add to map.
+ DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
+ DIScope DeclContext;
+
+ if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) {
+ DeclContext = resolve(SDMDecl.getContext());
+ assert(SDMDecl.isStaticMember() && "Expected static member decl");
+ assert(GV.isDefinition());
+ // We need the declaration DIE that is in the static member's class.
+ DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
+ addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
+ } else {
+ DeclContext = resolve(GV.getContext());
+ // Add name and type.
+ addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+ addType(*VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV.isLocalToUnit())
+ addFlag(*VariableDIE, dwarf::DW_AT_external);
+
+ // Add line number info.
+ addSourceLine(*VariableDIE, GV);
+ }
+
+ if (!GV.isDefinition())
+ addFlag(*VariableDIE, dwarf::DW_AT_declaration);
+
+ // Add location.
+ bool addToAccelTable = false;
+ bool isGlobalVariable = GV.getGlobal() != nullptr;
+ if (isGlobalVariable) {
+ addToAccelTable = true;
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
+ if (GV.getGlobal()->isThreadLocal()) {
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ }
+ // 3) followed by a custom OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
+ }
+
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ // Add the linkage name.
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty())
+ // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
+ // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
+ // TAG_variable.
+ addString(*VariableDIE,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+ : dwarf::DW_AT_MIPS_linkage_name,
+ GlobalValue::getRealLinkageName(LinkageName));
+ } else if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+ addConstantValue(*VariableDIE, CI, GTy);
+ } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) {
+ addToAccelTable = true;
+ // GV is a merged global.
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ Value *Ptr = CE->getOperand(0);
+ MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr));
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ }
+
+ if (addToAccelTable) {
+ DD->addAccelName(GV.getName(), *VariableDIE);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
+ DD->addAccelName(GV.getLinkageName(), *VariableDIE);
+ }
+
+ addGlobalName(GV.getName(), *VariableDIE, DeclContext);
+ return VariableDIE;
+}
+
+void DwarfCompileUnit::addRange(RangeSpan Range) {
+ bool SameAsPrevCU = this == DD->getPrevCU();
+ DD->setPrevCU(this);
+ // If we have no current ranges just add the range and return, otherwise,
+ // check the current section and CU against the previous section and CU we
+ // emitted into and the subprogram was contained within. If these are the
+ // same then extend our current range, otherwise add this as a new range.
+ if (CURanges.empty() || !SameAsPrevCU ||
+ (&CURanges.back().getEnd()->getSection() !=
+ &Range.getEnd()->getSection())) {
+ CURanges.push_back(Range);
+ return;
+ }
+
+ CURanges.back().setEnd(Range.getEnd());
+}
+
+void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label,
+ const MCSymbol *Sec) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ addLabel(Die, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Label);
+ else
+ addSectionDelta(Die, Attribute, Label, Sec);
+}
+
+void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
+ // Define start line table label for each Compile Unit.
+ MCSymbol *LineTableStartSym =
+ Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
+
+ stmtListIndex = UnitDie.getValues().size();
+
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section. For split dwarf this is
+ // left in the skeleton CU and so not included.
+ // The line table entries are not always emitted in assembly, so it
+ // is not okay to use line_table_start here.
+ addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
+ DwarfLineSectionSym);
+}
+
+void DwarfCompileUnit::applyStmtList(DIE &D) {
+ D.addValue(dwarf::DW_AT_stmt_list,
+ UnitDie.getAbbrev().getData()[stmtListIndex].getForm(),
+ UnitDie.getValues()[stmtListIndex]);
+}
+
+void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
+ const MCSymbol *End) {
+ assert(Begin && "Begin label should not be null!");
+ assert(End && "End label should not be null!");
+ assert(Begin->isDefined() && "Invalid starting label");
+ assert(End->isDefined() && "Invalid end label");
+
+ addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
+ if (DD->getDwarfVersion() < 4)
+ addLabelAddress(D, dwarf::DW_AT_high_pc, End);
+ else
+ addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
+}
+
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
+DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) {
+ DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
+
+ attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym());
+ if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
+ *DD->getCurrentFunction()))
+ addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
+
+ // Only include DW_AT_frame_base in full debug info
+ if (!includeMinimalInlineScopes()) {
+ const TargetRegisterInfo *RI =
+ Asm->TM.getSubtargetImpl()->getRegisterInfo();
+ MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+ addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ }
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_subprogram nodes.
+ DD->addSubprogramNames(SP, *SPDie);
+
+ return *SPDie;
+}
+
+// Construct a DIE for this scope.
+void DwarfCompileUnit::constructScopeDIE(
+ LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren) {
+ if (!Scope || !Scope->getScopeNode())
+ return;
+
+ DIScope DS(Scope->getScopeNode());
+
+ assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
+ "Only handle inlined subprograms here, use "
+ "constructSubprogramScopeDIE for non-inlined "
+ "subprograms");
+
+ SmallVector<std::unique_ptr<DIE>, 8> Children;
+
+ // We try to create the scope DIE first, then the children DIEs. This will
+ // avoid creating un-used children then removing them later when we find out
+ // the scope DIE is null.
+ std::unique_ptr<DIE> ScopeDIE;
+ if (Scope->getParent() && DS.isSubprogram()) {
+ ScopeDIE = constructInlinedScopeDIE(Scope);
+ if (!ScopeDIE)
+ return;
+ // We create children when the scope DIE is not null.
+ createScopeChildrenDIE(Scope, Children);
+ } else {
+ // Early exit when we know the scope DIE is going to be null.
+ if (DD->isLexicalScopeDIENull(Scope))
+ return;
+
+ unsigned ChildScopeCount;
+
+ // We create children here when we know the scope DIE is not going to be
+ // null and the children will be added to the scope DIE.
+ createScopeChildrenDIE(Scope, Children, &ChildScopeCount);
+
+ // Skip imported directives in gmlt-like data.
+ if (!includeMinimalInlineScopes()) {
+ // There is no need to emit empty lexical block DIE.
+ for (const auto &E : DD->findImportedEntitiesForScope(DS))
+ Children.push_back(
+ constructImportedEntityDIE(DIImportedEntity(E.second)));
+ }
+
+ // If there are only other scopes as children, put them directly in the
+ // parent instead, as this scope would serve no purpose.
+ if (Children.size() == ChildScopeCount) {
+ FinalChildren.insert(FinalChildren.end(),
+ std::make_move_iterator(Children.begin()),
+ std::make_move_iterator(Children.end()));
+ return;
+ }
+ ScopeDIE = constructLexicalScopeDIE(Scope);
+ assert(ScopeDIE && "Scope DIE should not be null.");
+ }
+
+ // Add children
+ for (auto &I : Children)
+ ScopeDIE->addChild(std::move(I));
+
+ FinalChildren.push_back(std::move(ScopeDIE));
+}
+
+void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+ Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Value);
+}
+
+void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
+ SmallVector<RangeSpan, 2> Range) {
+ // Emit offset in .debug_range as a relocatable label. emitDIE will handle
+ // emitting it appropriately.
+ auto *RangeSectionSym = DD->getRangeSectionSym();
+
+ RangeSpanList List(
+ Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()),
+ std::move(Range));
+
+ // Under fission, ranges are specified by constant offsets relative to the
+ // CU's DW_AT_GNU_ranges_base.
+ if (isDwoUnit())
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ else
+ addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+
+ // Add the range list to the set of ranges to be emitted.
+ (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List));
+}
+
+void DwarfCompileUnit::attachRangesOrLowHighPC(
+ DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
+ if (Ranges.size() == 1) {
+ const auto &single = Ranges.front();
+ attachLowHighPC(Die, single.getStart(), single.getEnd());
+ } else
+ addScopeRangeList(Die, std::move(Ranges));
+}
+
+void DwarfCompileUnit::attachRangesOrLowHighPC(
+ DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) {
+ SmallVector<RangeSpan, 2> List;
+ List.reserve(Ranges.size());
+ for (const InsnRange &R : Ranges)
+ List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first),
+ DD->getLabelAfterInsn(R.second)));
+ attachRangesOrLowHighPC(Die, std::move(List));
+}
+
+// This scope represents inlined body of a function. Construct DIE to
+// represent this concrete inlined copy of the function.
+std::unique_ptr<DIE>
+DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
+ assert(Scope->getScopeNode());
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS);
+ // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
+ // was inlined from another compile unit.
+ DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP];
+ assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
+
+ auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine);
+ addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
+
+ attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
+
+ // Add the call site information to the DIE.
+ DILocation DL(Scope->getInlinedAt());
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
+ getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ DD->addSubprogramNames(InlinedSP, *ScopeDIE);
+
+ return ScopeDIE;
+}
+
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
+std::unique_ptr<DIE>
+DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
+ if (DD->isLexicalScopeDIENull(Scope))
+ return nullptr;
+
+ auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
+
+ attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
+
+ return ScopeDIE;
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+std::unique_ptr<DIE> DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
+ bool Abstract) {
+ auto D = constructVariableDIEImpl(DV, Abstract);
+ DV.setDIE(*D);
+ return D;
+}
+
+std::unique_ptr<DIE>
+DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
+ bool Abstract) {
+ // Define variable debug information entry.
+ auto VariableDie = make_unique<DIE>(DV.getTag());
+
+ if (Abstract) {
+ applyVariableAttributes(DV, *VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Offset = DV.getDotDebugLocOffset();
+ if (Offset != ~0U) {
+ addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ if (const MachineInstr *DVInsn = DV.getMInsn()) {
+ assert(DVInsn->getNumOperands() == 4);
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ // If the second operand is an immediate, this is an indirect value.
+ if (DVInsn->getOperand(1).isImm()) {
+ MachineLocation Location(RegOp.getReg(),
+ DVInsn->getOperand(1).getImm());
+ addVariableAddress(DV, *VariableDie, Location);
+ } else if (RegOp.getReg())
+ addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
+ } else if (DVInsn->getOperand(0).isImm())
+ addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
+ else if (DVInsn->getOperand(0).isFPImm())
+ addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isCImm())
+ addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
+ DV.getType());
+
+ return VariableDie;
+ }
+
+ // .. else use frame index.
+ int FI = DV.getFrameIndex();
+ if (FI != ~0) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI =
+ Asm->TM.getSubtargetImpl()->getFrameLowering();
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, *VariableDie, Location);
+ }
+
+ return VariableDie;
+}
+
+std::unique_ptr<DIE> DwarfCompileUnit::constructVariableDIE(
+ DbgVariable &DV, const LexicalScope &Scope, DIE *&ObjectPointer) {
+ auto Var = constructVariableDIE(DV, Scope.isAbstractScope());
+ if (DV.isObjectPointer())
+ ObjectPointer = Var.get();
+ return Var;
+}
+
+DIE *DwarfCompileUnit::createScopeChildrenDIE(
+ LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &Children,
+ unsigned *ChildScopeCount) {
+ DIE *ObjectPointer = nullptr;
+
+ for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope))
+ Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
+
+ unsigned ChildCountWithoutScopes = Children.size();
+
+ for (LexicalScope *LS : Scope->getChildren())
+ constructScopeDIE(LS, Children);
+
+ if (ChildScopeCount)
+ *ChildScopeCount = Children.size() - ChildCountWithoutScopes;
+
+ return ObjectPointer;
+}
+
+void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
+ assert(Scope && Scope->getScopeNode());
+ assert(!Scope->getInlinedAt());
+ assert(!Scope->isAbstractScope());
+ DISubprogram Sub(Scope->getScopeNode());
+
+ assert(Sub.isSubprogram());
+
+ DD->getProcessedSPNodes().insert(Sub);
+
+ DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
+
+ // If this is a variadic function, add an unspecified parameter.
+ DITypeArray FnArgs = Sub.getType().getTypeArray();
+
+ // Collect lexical scope children first.
+ // ObjectPointer might be a local (non-argument) local variable if it's a
+ // block's synthetic this pointer.
+ if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
+ addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
+
+ // If we have a single element of null, it is a function that returns void.
+ // If we have more than one elements and the last one is null, it is a
+ // variadic function.
+ if (FnArgs.getNumElements() > 1 &&
+ !FnArgs.getElement(FnArgs.getNumElements() - 1) &&
+ !includeMinimalInlineScopes())
+ ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
+}
+
+DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
+ DIE &ScopeDIE) {
+ // We create children when the scope DIE is not null.
+ SmallVector<std::unique_ptr<DIE>, 8> Children;
+ DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);
+
+ // Add children
+ for (auto &I : Children)
+ ScopeDIE.addChild(std::move(I));
+
+ return ObjectPointer;
+}
+
+void
+DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
+ DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()];
+ if (AbsDef)
+ return;
+
+ DISubprogram SP(Scope->getScopeNode());
+
+ DIE *ContextDIE;
+
+ if (includeMinimalInlineScopes())
+ ContextDIE = &getUnitDie();
+ // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
+ // the important distinction that the DIDescriptor is not associated with the
+ // DIE (since the DIDescriptor will be associated with the concrete DIE, if
+ // any). It could be refactored to some common utility function.
+ else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+ ContextDIE = &getUnitDie();
+ getOrCreateSubprogramDIE(SPDecl);
+ } else
+ ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+
+ // Passing null as the associated DIDescriptor because the abstract definition
+ // shouldn't be found by lookup.
+ AbsDef =
+ &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor());
+ applySubprogramAttributesToDefinition(SP, *AbsDef);
+
+ if (!includeMinimalInlineScopes())
+ addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+ if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, *AbsDef))
+ addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
+}
+
+std::unique_ptr<DIE>
+DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) {
+ assert(Module.Verify() &&
+ "Use one of the MDNode * overloads to handle invalid metadata");
+ std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag());
+ insertDIE(Module, IMDie.get());
+ DIE *EntityDie;
+ DIDescriptor Entity = resolve(Module.getEntity());
+ if (Entity.isNameSpace())
+ EntityDie = getOrCreateNameSpace(DINameSpace(Entity));
+ else if (Entity.isSubprogram())
+ EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity));
+ else if (Entity.isType())
+ EntityDie = getOrCreateTypeDIE(DIType(Entity));
+ else if (Entity.isGlobalVariable())
+ EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity));
+ else
+ EntityDie = getDIE(Entity);
+ assert(EntityDie);
+ addSourceLine(*IMDie, Module.getLineNumber(),
+ Module.getContext().getFilename(),
+ Module.getContext().getDirectory());
+ addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
+ StringRef Name = Module.getName();
+ if (!Name.empty())
+ addString(*IMDie, dwarf::DW_AT_name, Name);
+
+ return IMDie;
+}
+
+void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) {
+ DIE *D = getDIE(SP);
+ if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) {
+ if (D)
+ // If this subprogram has an abstract definition, reference that
+ addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
+ } else {
+ if (!D && !includeMinimalInlineScopes())
+ // Lazily construct the subprogram if we didn't see either concrete or
+ // inlined versions during codegen. (except in -gmlt ^ where we want
+ // to omit these entirely)
+ D = getOrCreateSubprogramDIE(SP);
+ if (D)
+ // And attach the attributes
+ applySubprogramAttributesToDefinition(SP, *D);
+ }
+}
+void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
+ assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram");
+ assert(SP.isDefinition() &&
+ "CU's subprogram list contains a subprogram declaration");
+ DIArray Variables = SP.getVariables();
+ if (Variables.getNumElements() == 0)
+ return;
+
+ DIE *SPDIE = DU->getAbstractSPDies().lookup(SP);
+ if (!SPDIE)
+ SPDIE = getDIE(SP);
+ assert(SPDIE);
+ for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
+ DIVariable DV(Variables.getElement(vi));
+ assert(DV.isVariable());
+ DbgVariable NewVar(DV, DIExpression(nullptr), DD);
+ auto VariableDie = constructVariableDIE(NewVar);
+ applyVariableAttributes(NewVar, *VariableDie);
+ SPDIE->addChild(std::move(VariableDie));
+ }
+}
+
+void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const {
+ // Don't bother labeling the .dwo unit, as its offset isn't used.
+ if (!Skeleton)
+ Asm->OutStreamer.EmitLabel(LabelBegin);
+
+ DwarfUnit::emitHeader(ASectionSym);
+}
+
+/// addGlobalName - Add a new global name to the compile unit.
+void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
+ DIScope Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Name.str();
+ GlobalNames[FullName] = &Die;
+}
+
+/// Add a new global type to the unit.
+void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die,
+ DIScope Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Ty.getName().str();
+ GlobalTypes[FullName] = &Die;
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a
+/// DbgVariable based on provided MachineLocation.
+void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
+ MachineLocation Location) {
+ if (DV.variableHasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV.isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location,
+ DV.getVariable().isIndirect());
+}
+
+/// Add an address attribute to a die based on the location provided.
+void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MachineLocation &Location,
+ bool Indirect) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+
+ if (Location.isReg() && !Indirect)
+ addRegisterOpPiece(*Loc, Location.getReg());
+ else {
+ addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+ if (Indirect && !Location.isReg()) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ }
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, Loc);
+}
+
+/// Start with the address based on the location provided, and generate the
+/// DWARF information necessary to find the actual variable given the extra
+/// address information encoded in the DbgVariable, starting from the starting
+/// location. Add the DWARF information to the die.
+void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ unsigned N = DV.getNumAddrElements();
+ unsigned i = 0;
+ if (Location.isReg()) {
+ if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_plus) {
+ assert(!DV.getVariable().isIndirect() &&
+ "double indirection not handled");
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1));
+ i = 2;
+ } else if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_deref) {
+ assert(!DV.getVariable().isIndirect() &&
+ "double indirection not handled");
+ addRegisterOpPiece(*Loc, Location.getReg(),
+ DV.getExpression().getPieceSize(),
+ DV.getExpression().getPieceOffset());
+ i = 3;
+ } else
+ addRegisterOpPiece(*Loc, Location.getReg());
+ } else
+ addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+
+ for (; i < N; ++i) {
+ uint64_t Element = DV.getAddrElement(i);
+ if (Element == dwarf::DW_OP_plus) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
+
+ } else if (Element == dwarf::DW_OP_deref) {
+ if (!Location.isReg())
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ } else if (Element == dwarf::DW_OP_piece) {
+ const unsigned SizeOfByte = 8;
+ unsigned PieceOffsetInBits = DV.getAddrElement(++i) * SizeOfByte;
+ unsigned PieceSizeInBits = DV.getAddrElement(++i) * SizeOfByte;
+ // Emit DW_OP_bit_piece Size Offset.
+ assert(PieceSizeInBits > 0 && "piece has zero size");
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece);
+ addUInt(*Loc, dwarf::DW_FORM_udata, PieceSizeInBits);
+ addUInt(*Loc, dwarf::DW_FORM_udata, PieceOffsetInBits);
+ } else
+ llvm_unreachable("unknown DIBuilder Opcode");
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, Loc);
+}
+
+/// Add a Dwarf loclistptr attribute data and value.
+void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
+ unsigned Index) {
+ DIEValue *Value = new (DIEValueAllocator) DIELocList(Index);
+ dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4;
+ Die.addValue(Attribute, Form, Value);
+}
+
+void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
+ DIE &VariableDie) {
+ StringRef Name = Var.getName();
+ if (!Name.empty())
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(VariableDie, Var.getVariable());
+ addType(VariableDie, Var.getType());
+ if (Var.isArtificial())
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
+}
+
+/// Add a Dwarf expression attribute data and value.
+void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
+ const MCExpr *Expr) {
+ DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
+ Die.addValue((dwarf::Attribute)0, Form, Value);
+}
+
+void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP,
+ DIE &SPDie) {
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+ applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
+ addGlobalName(SP.getName(), SPDie, Context);
+}
+
+bool DwarfCompileUnit::isDwoUnit() const {
+ return DD->useSplitDwarf() && Skeleton;
+}
+
+bool DwarfCompileUnit::includeMinimalInlineScopes() const {
+ return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly ||
+ (DD->useSplitDwarf() && !Skeleton);
+}
+} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 0000000..e521f39
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,250 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DwarfUnit.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/DebugInfo.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfDebug;
+class DwarfFile;
+class MCSymbol;
+class LexicalScope;
+
+class DwarfCompileUnit : public DwarfUnit {
+ /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
+ /// the need to search for it in applyStmtList.
+ unsigned stmtListIndex;
+
+ /// Skeleton unit associated with this unit.
+ DwarfCompileUnit *Skeleton;
+
+ /// A label at the start of the non-dwo section related to this unit.
+ MCSymbol *SectionSym;
+
+ /// The start of the unit within its section.
+ MCSymbol *LabelBegin;
+
+ /// GlobalNames - A map of globally visible named entities for this unit.
+ StringMap<const DIE *> GlobalNames;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ StringMap<const DIE *> GlobalTypes;
+
+ // List of range lists for a given compile unit, separate from the ranges for
+ // the CU itself.
+ SmallVector<RangeSpanList, 1> CURangeLists;
+
+ // List of ranges for a given compile unit.
+ SmallVector<RangeSpan, 2> CURanges;
+
+ // The base address of this unit, if any. Used for relative references in
+ // ranges/locs.
+ const MCSymbol *BaseAddress;
+
+ /// \brief Construct a DIE for the given DbgVariable without initializing the
+ /// DbgVariable's DIE reference.
+ std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV,
+ bool Abstract);
+
+ bool isDwoUnit() const override;
+
+ bool includeMinimalInlineScopes() const;
+
+public:
+ DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU);
+
+ DwarfCompileUnit *getSkeleton() const {
+ return Skeleton;
+ }
+
+ void initStmtList(MCSymbol *DwarfLineSectionSym);
+
+ /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
+ void applyStmtList(DIE &D);
+
+ /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
+ DIE *getOrCreateGlobalVariableDIE(DIGlobalVariable GV);
+
+ /// addLabelAddress - Add a dwarf label attribute data and value using
+ /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label);
+
+ /// addLocalLabelAddress - Add a dwarf label attribute data and value using
+ /// DW_FORM_addr only.
+ void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label);
+
+ /// addSectionDelta - Add a label delta attribute data and value.
+ void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+ const MCSymbol *Lo);
+
+ DwarfCompileUnit &getCU() override { return *this; }
+
+ unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+
+ /// addRange - Add an address range to the list of ranges for this unit.
+ void addRange(RangeSpan Range);
+
+ void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
+
+ /// addSectionLabel - Add a Dwarf section label attribute data and value.
+ ///
+ void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec);
+
+ /// \brief Find DIE for the given subprogram and attach appropriate
+ /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+ /// variables in this scope then create and insert DIEs for these
+ /// variables.
+ DIE &updateSubprogramScopeDIE(DISubprogram SP);
+
+ void constructScopeDIE(LexicalScope *Scope,
+ SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren);
+
+ /// \brief A helper function to construct a RangeSpanList for a given
+ /// lexical scope.
+ void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range);
+
+ void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges);
+
+ void attachRangesOrLowHighPC(DIE &D,
+ const SmallVectorImpl<InsnRange> &Ranges);
+ /// \brief This scope represents inlined body of a function. Construct
+ /// DIE to represent this concrete inlined copy of the function.
+ std::unique_ptr<DIE> constructInlinedScopeDIE(LexicalScope *Scope);
+
+ /// \brief Construct new DW_TAG_lexical_block for this scope and
+ /// attach DW_AT_low_pc/DW_AT_high_pc labels.
+ std::unique_ptr<DIE> constructLexicalScopeDIE(LexicalScope *Scope);
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
+ bool Abstract = false);
+
+ std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
+ const LexicalScope &Scope,
+ DIE *&ObjectPointer);
+
+ /// A helper function to create children of a Scope DIE.
+ DIE *createScopeChildrenDIE(LexicalScope *Scope,
+ SmallVectorImpl<std::unique_ptr<DIE>> &Children,
+ unsigned *ChildScopeCount = nullptr);
+
+ /// \brief Construct a DIE for this subprogram scope.
+ void constructSubprogramScopeDIE(LexicalScope *Scope);
+
+ DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
+
+ void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+
+ /// \brief Construct import_module DIE.
+ std::unique_ptr<DIE>
+ constructImportedEntityDIE(const DIImportedEntity &Module);
+
+ void finishSubprogramDefinition(DISubprogram SP);
+
+ void collectDeadVariables(DISubprogram SP);
+
+ /// Set the skeleton unit associated with this unit.
+ void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
+
+ MCSymbol *getSectionSym() const {
+ assert(Section);
+ return SectionSym;
+ }
+
+ /// Pass in the SectionSym even though we could recreate it in every compile
+ /// unit (type units will have actually distinct symbols once they're in
+ /// comdat sections).
+ void initSection(const MCSection *Section, MCSymbol *SectionSym) {
+ DwarfUnit::initSection(Section);
+ this->SectionSym = SectionSym;
+
+ // Don't bother labeling the .dwo unit, as its offset isn't used.
+ if (!Skeleton)
+ LabelBegin =
+ Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
+ }
+
+ unsigned getLength() {
+ return sizeof(uint32_t) + // Length field
+ getHeaderSize() + UnitDie.getSize();
+ }
+
+ void emitHeader(const MCSymbol *ASectionSym) const override;
+
+ MCSymbol *getLabelBegin() const {
+ assert(Section);
+ return LabelBegin;
+ }
+
+ /// Add a new global name to the compile unit.
+ void addGlobalName(StringRef Name, DIE &Die, DIScope Context) override;
+
+ /// Add a new global type to the compile unit.
+ void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) override;
+
+ const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
+ const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
+
+ /// Add DW_AT_location attribute for a DbgVariable based on provided
+ /// MachineLocation.
+ void addVariableAddress(const DbgVariable &DV, DIE &Die,
+ MachineLocation Location);
+ /// Add an address attribute to a die based on the location provided.
+ void addAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MachineLocation &Location, bool Indirect = false);
+
+ /// Start with the address based on the location provided, and generate the
+ /// DWARF information necessary to find the actual variable (navigating the
+ /// extra location information encoded in the type) based on the starting
+ /// location. Add the DWARF information to the die.
+ void addComplexAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location);
+
+ /// Add a Dwarf loclistptr attribute data and value.
+ void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
+ void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
+
+ /// Add a Dwarf expression attribute data and value.
+ void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
+
+ void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
+
+ /// getRangeLists - Get the vector of range lists.
+ const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
+ return (Skeleton ? Skeleton : this)->CURangeLists;
+ }
+
+ /// getRanges - Get the list of ranges for this unit.
+ const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
+ SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); }
+
+ void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
+ const MCSymbol *getBaseAddress() const { return BaseAddress; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 77860c0..230ea46 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "ByteStreamer.h"
#include "DwarfDebug.h"
+
+#include "ByteStreamer.h"
+#include "DwarfCompileUnit.h"
#include "DIE.h"
#include "DIEHash.h"
#include "DwarfUnit.h"
@@ -36,6 +38,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
@@ -47,6 +50,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
@@ -149,7 +153,7 @@ DIType DbgVariable::getType() const {
if (tag == dwarf::DW_TAG_pointer_type)
subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom());
- DIArray Elements = DICompositeType(subType).getTypeArray();
+ DIArray Elements = DICompositeType(subType).getElements();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDerivedType DT(Elements.getElement(i));
if (getName() == DT.getName())
@@ -165,10 +169,11 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
- : Asm(A), MMI(Asm->MMI), FirstCU(nullptr), PrevLabel(nullptr),
- GlobalRangeCount(0), InfoHolder(A, "info_string", DIEValueAllocator),
+ : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0),
+ InfoHolder(A, *this, "info_string", DIEValueAllocator),
UsedNonDefaultText(false),
- SkeletonHolder(A, "skel_string", DIEValueAllocator),
+ SkeletonHolder(A, *this, "skel_string", DIEValueAllocator),
+ IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -188,8 +193,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// Turn on accelerator tables for Darwin by default, pubnames by
// default for non-Darwin, and handle split dwarf.
- bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin();
-
if (DwarfAccelTables == Default)
HasDwarfAccelTables = IsDarwin;
else
@@ -308,26 +311,6 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
return false;
}
-// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
-// and DW_AT_high_pc attributes. If there are global variables in this
-// scope then create and insert DIEs for these variables.
-DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU,
- DISubprogram SP) {
- DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP);
-
- attachLowHighPC(SPCU, *SPDie, FunctionBeginSym, FunctionEndSym);
-
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- MachineLocation Location(RI->getFrameRegister(*Asm->MF));
- SPCU.addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
-
- // Add name to the name table, we do this here because we're guaranteed
- // to have concrete versions of our DW_TAG_subprogram nodes.
- addSubprogramNames(SP, *SPDie);
-
- return *SPDie;
-}
-
/// Check whether we should create a DIE for the given Scope, return true
/// if we don't create a DIE (the corresponding DIE is null).
bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
@@ -344,271 +327,30 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
// We don't create a DIE if we have a single Range and the end label
// is null.
- SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin();
- MCSymbol *End = getLabelAfterInsn(RI->second);
- return !End;
-}
-
-static void addSectionLabel(AsmPrinter &Asm, DwarfUnit &U, DIE &D,
- dwarf::Attribute A, const MCSymbol *L,
- const MCSymbol *Sec) {
- if (Asm.MAI->doesDwarfUseRelocationsAcrossSections())
- U.addSectionLabel(D, A, L);
- else
- U.addSectionDelta(D, A, L, Sec);
-}
-
-void DwarfDebug::addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE,
- const SmallVectorImpl<InsnRange> &Range) {
- // Emit offset in .debug_range as a relocatable label. emitDIE will handle
- // emitting it appropriately.
- MCSymbol *RangeSym = Asm->GetTempSymbol("debug_ranges", GlobalRangeCount++);
-
- // Under fission, ranges are specified by constant offsets relative to the
- // CU's DW_AT_GNU_ranges_base.
- if (useSplitDwarf())
- TheCU.addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
- DwarfDebugRangeSectionSym);
- else
- addSectionLabel(*Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
- DwarfDebugRangeSectionSym);
-
- RangeSpanList List(RangeSym);
- for (const InsnRange &R : Range) {
- RangeSpan Span(getLabelBeforeInsn(R.first), getLabelAfterInsn(R.second));
- List.addRange(std::move(Span));
- }
-
- // Add the range list to the set of ranges to be emitted.
- TheCU.addRangeList(std::move(List));
-}
-
-void DwarfDebug::attachRangesOrLowHighPC(DwarfCompileUnit &TheCU, DIE &Die,
- const SmallVectorImpl<InsnRange> &Ranges) {
- assert(!Ranges.empty());
- if (Ranges.size() == 1)
- attachLowHighPC(TheCU, Die, getLabelBeforeInsn(Ranges.front().first),
- getLabelAfterInsn(Ranges.front().second));
- else
- addScopeRangeList(TheCU, Die, Ranges);
-}
-
-// Construct new DW_TAG_lexical_block for this scope and attach
-// DW_AT_low_pc/DW_AT_high_pc labels.
-std::unique_ptr<DIE>
-DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
- if (isLexicalScopeDIENull(Scope))
- return nullptr;
-
- auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block);
- if (Scope->isAbstractScope())
- return ScopeDIE;
-
- attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges());
-
- return ScopeDIE;
+ return !getLabelAfterInsn(Ranges.front().second);
}
-// This scope represents inlined body of a function. Construct DIE to
-// represent this concrete inlined copy of the function.
-std::unique_ptr<DIE>
-DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
- assert(Scope->getScopeNode());
- DIScope DS(Scope->getScopeNode());
- DISubprogram InlinedSP = getDISubprogram(DS);
- // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
- // was inlined from another compile unit.
- DIE *OriginDIE = AbstractSPDies[InlinedSP];
- assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
-
- auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine);
- TheCU.addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
-
- attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges());
-
- InlinedSubprogramDIEs.insert(OriginDIE);
-
- // Add the call site information to the DIE.
- DILocation DL(Scope->getInlinedAt());
- TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
- TheCU.getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
- TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
-
- // Add name to the name table, we do this here because we're guaranteed
- // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
- addSubprogramNames(InlinedSP, *ScopeDIE);
-
- return ScopeDIE;
-}
-
-static std::unique_ptr<DIE> constructVariableDIE(DwarfCompileUnit &TheCU,
- DbgVariable &DV,
- const LexicalScope &Scope,
- DIE *&ObjectPointer) {
- auto Var = TheCU.constructVariableDIE(DV, Scope.isAbstractScope());
- if (DV.isObjectPointer())
- ObjectPointer = Var.get();
- return Var;
-}
-
-DIE *DwarfDebug::createScopeChildrenDIE(
- DwarfCompileUnit &TheCU, LexicalScope *Scope,
- SmallVectorImpl<std::unique_ptr<DIE>> &Children) {
- DIE *ObjectPointer = nullptr;
-
- // Collect arguments for current function.
- if (LScopes.isCurrentFunctionScope(Scope)) {
- for (DbgVariable *ArgDV : CurrentFnArguments)
- if (ArgDV)
- Children.push_back(
- constructVariableDIE(TheCU, *ArgDV, *Scope, ObjectPointer));
-
- // If this is a variadic function, add an unspecified parameter.
- DISubprogram SP(Scope->getScopeNode());
- DIArray FnArgs = SP.getType().getTypeArray();
- if (FnArgs.getElement(FnArgs.getNumElements() - 1)
- .isUnspecifiedParameter()) {
- Children.push_back(
- make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
- }
- }
-
- // Collect lexical scope children first.
- for (DbgVariable *DV : ScopeVariables.lookup(Scope))
- Children.push_back(constructVariableDIE(TheCU, *DV, *Scope, ObjectPointer));
-
- for (LexicalScope *LS : Scope->getChildren())
- if (std::unique_ptr<DIE> Nested = constructScopeDIE(TheCU, LS))
- Children.push_back(std::move(Nested));
- return ObjectPointer;
+template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) {
+ F(CU);
+ if (auto *SkelCU = CU.getSkeleton())
+ F(*SkelCU);
}
-void DwarfDebug::createAndAddScopeChildren(DwarfCompileUnit &TheCU,
- LexicalScope *Scope, DIE &ScopeDIE) {
- // We create children when the scope DIE is not null.
- SmallVector<std::unique_ptr<DIE>, 8> Children;
- if (DIE *ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children))
- TheCU.addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
-
- // Add children
- for (auto &I : Children)
- ScopeDIE.addChild(std::move(I));
-}
-
-void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
+void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
assert(Scope && Scope->getScopeNode());
assert(Scope->isAbstractScope());
assert(!Scope->getInlinedAt());
- DISubprogram SP(Scope->getScopeNode());
+ const MDNode *SP = Scope->getScopeNode();
ProcessedSPNodes.insert(SP);
- DIE *&AbsDef = AbstractSPDies[SP];
- if (AbsDef)
- return;
-
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- DwarfCompileUnit &SPCU = *SPMap[SP];
- DIE *ContextDIE;
-
- // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
- // the important distinction that the DIDescriptor is not associated with the
- // DIE (since the DIDescriptor will be associated with the concrete DIE, if
- // any). It could be refactored to some common utility function.
- if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
- ContextDIE = &SPCU.getUnitDie();
- SPCU.getOrCreateSubprogramDIE(SPDecl);
- } else
- ContextDIE = SPCU.getOrCreateContextDIE(resolve(SP.getContext()));
-
- // Passing null as the associated DIDescriptor because the abstract definition
- // shouldn't be found by lookup.
- AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE,
- DIDescriptor());
- SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef);
-
- SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
- createAndAddScopeChildren(SPCU, Scope, *AbsDef);
-}
-
-DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
- assert(Scope && Scope->getScopeNode());
- assert(!Scope->getInlinedAt());
- assert(!Scope->isAbstractScope());
- DISubprogram Sub(Scope->getScopeNode());
-
- assert(Sub.isSubprogram());
-
- ProcessedSPNodes.insert(Sub);
-
- DIE &ScopeDIE = updateSubprogramScopeDIE(TheCU, Sub);
-
- createAndAddScopeChildren(TheCU, Scope, ScopeDIE);
-
- return ScopeDIE;
-}
-
-// Construct a DIE for this scope.
-std::unique_ptr<DIE> DwarfDebug::constructScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
- if (!Scope || !Scope->getScopeNode())
- return nullptr;
-
- DIScope DS(Scope->getScopeNode());
-
- assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
- "Only handle inlined subprograms here, use "
- "constructSubprogramScopeDIE for non-inlined "
- "subprograms");
-
- SmallVector<std::unique_ptr<DIE>, 8> Children;
-
- // We try to create the scope DIE first, then the children DIEs. This will
- // avoid creating un-used children then removing them later when we find out
- // the scope DIE is null.
- std::unique_ptr<DIE> ScopeDIE;
- if (Scope->getParent() && DS.isSubprogram()) {
- ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
- if (!ScopeDIE)
- return nullptr;
- // We create children when the scope DIE is not null.
- createScopeChildrenDIE(TheCU, Scope, Children);
- } else {
- // Early exit when we know the scope DIE is going to be null.
- if (isLexicalScopeDIENull(Scope))
- return nullptr;
-
- // We create children here when we know the scope DIE is not going to be
- // null and the children will be added to the scope DIE.
- createScopeChildrenDIE(TheCU, Scope, Children);
-
- // There is no need to emit empty lexical block DIE.
- std::pair<ImportedEntityMap::const_iterator,
- ImportedEntityMap::const_iterator> Range =
- std::equal_range(ScopesWithImportedEntities.begin(),
- ScopesWithImportedEntities.end(),
- std::pair<const MDNode *, const MDNode *>(DS, nullptr),
- less_first());
- if (Children.empty() && Range.first == Range.second)
- return nullptr;
- ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
- assert(ScopeDIE && "Scope DIE should not be null.");
- for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second;
- ++i)
- constructImportedEntityDIE(TheCU, i->second, *ScopeDIE);
- }
-
- // Add children
- for (auto &I : Children)
- ScopeDIE->addChild(std::move(I));
-
- return ScopeDIE;
+ auto &CU = SPMap[SP];
+ forBothCUs(*CU, [&](DwarfCompileUnit &CU) {
+ CU.constructAbstractSubprogramScopeDIE(Scope);
+ });
}
void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
@@ -629,6 +371,8 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
DwarfCompileUnit &NewCU = *OwnedUnit;
DIE &Die = NewCU.getUnitDie();
InfoHolder.addUnit(std::move(OwnedUnit));
+ if (useSplitDwarf())
+ NewCU.setSkeleton(constructSkeletonCU(NewCU));
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
@@ -665,14 +409,10 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
dwarf::DW_FORM_data1, RVer);
- if (!FirstCU)
- FirstCU = &NewCU;
-
- if (useSplitDwarf()) {
+ if (useSplitDwarf())
NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
DwarfInfoDWOSectionSym);
- NewCU.setSkeleton(constructSkeletonCU(NewCU));
- } else
+ else
NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
DwarfInfoSectionSym);
@@ -681,44 +421,12 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
return NewCU;
}
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
- const MDNode *N) {
+void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
+ const MDNode *N) {
DIImportedEntity Module(N);
assert(Module.Verify());
if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext()))
- constructImportedEntityDIE(TheCU, Module, *D);
-}
-
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
- const MDNode *N, DIE &Context) {
- DIImportedEntity Module(N);
- assert(Module.Verify());
- return constructImportedEntityDIE(TheCU, Module, Context);
-}
-
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
- const DIImportedEntity &Module,
- DIE &Context) {
- assert(Module.Verify() &&
- "Use one of the MDNode * overloads to handle invalid metadata");
- DIE &IMDie = TheCU.createAndAddDIE(Module.getTag(), Context, Module);
- DIE *EntityDie;
- DIDescriptor Entity = resolve(Module.getEntity());
- if (Entity.isNameSpace())
- EntityDie = TheCU.getOrCreateNameSpace(DINameSpace(Entity));
- else if (Entity.isSubprogram())
- EntityDie = TheCU.getOrCreateSubprogramDIE(DISubprogram(Entity));
- else if (Entity.isType())
- EntityDie = TheCU.getOrCreateTypeDIE(DIType(Entity));
- else
- EntityDie = TheCU.getDIE(Entity);
- TheCU.addSourceLine(IMDie, Module.getLineNumber(),
- Module.getContext().getFilename(),
- Module.getContext().getDirectory());
- TheCU.addDIEEntry(IMDie, dwarf::DW_AT_import, *EntityDie);
- StringRef Name = Module.getName();
- if (!Name.empty())
- TheCU.addString(IMDie, dwarf::DW_AT_name, Name);
+ D->addChild(TheCU.constructImportedEntityDIE(Module));
}
// Emit all Dwarf sections that should come prior to the content. Create
@@ -732,8 +440,6 @@ void DwarfDebug::beginModule() {
FunctionDIs = makeSubprogramMap(*M);
- // If module has named metadata anchors then use them, otherwise scan the
- // module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes)
return;
@@ -756,13 +462,18 @@ void DwarfDebug::beginModule() {
ScopesWithImportedEntities.end(), less_first());
DIArray GVs = CUNode.getGlobalVariables();
for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
- CU.createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
+ CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
DIArray SPs = CUNode.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
SPMap.insert(std::make_pair(SPs.getElement(i), &CU));
DIArray EnumTypes = CUNode.getEnumTypes();
- for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
- CU.getOrCreateTypeDIE(EnumTypes.getElement(i));
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) {
+ DIType Ty(EnumTypes.getElement(i));
+ // The enum types array by design contains pointers to
+ // MDNodes rather than DIRefs. Unique them here.
+ DIType UniqueTy(resolve(Ty.getRef()));
+ CU.getOrCreateTypeDIE(UniqueTy);
+ }
DIArray RetainedTypes = CUNode.getRetainedTypes();
for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) {
DIType Ty(RetainedTypes.getElement(i));
@@ -774,7 +485,7 @@ void DwarfDebug::beginModule() {
// Emit imported_modules last so that the relevant context is already
// available.
for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
- constructImportedEntityDIE(CU, ImportedEntities.getElement(i));
+ constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i));
}
// Tell MMI that we have debug info.
@@ -787,9 +498,7 @@ void DwarfDebug::beginModule() {
void DwarfDebug::finishVariableDefinitions() {
for (const auto &Var : ConcreteVariables) {
DIE *VariableDie = Var->getDIE();
- // FIXME: There shouldn't be any variables without DIEs.
- if (!VariableDie)
- continue;
+ assert(VariableDie);
// FIXME: Consider the time-space tradeoff of just storing the unit pointer
// in the ConcreteVariables list, rather than looking it up again here.
// DIE::getUnit isn't simple - it walks parent pointers, etc.
@@ -805,36 +514,10 @@ void DwarfDebug::finishVariableDefinitions() {
}
void DwarfDebug::finishSubprogramDefinitions() {
- const Module *M = MMI->getModule();
-
- NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- for (MDNode *N : CU_Nodes->operands()) {
- DICompileUnit TheCU(N);
- // Construct subprogram DIE and add variables DIEs.
- DwarfCompileUnit *SPCU =
- static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
- DIArray Subprograms = TheCU.getSubprograms();
- for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
- DISubprogram SP(Subprograms.getElement(i));
- // Perhaps the subprogram is in another CU (such as due to comdat
- // folding, etc), in which case ignore it here.
- if (SPMap[SP] != SPCU)
- continue;
- DIE *D = SPCU->getDIE(SP);
- if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) {
- if (D)
- // If this subprogram has an abstract definition, reference that
- SPCU->addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
- } else {
- if (!D)
- // Lazily construct the subprogram if we didn't see either concrete or
- // inlined versions during codegen.
- D = SPCU->getOrCreateSubprogramDIE(SP);
- // And attach the attributes
- SPCU->applySubprogramAttributesToDefinition(SP, *D);
- }
- }
- }
+ for (const auto &P : SPMap)
+ forBothCUs(*P.second, [&](DwarfCompileUnit &CU) {
+ CU.finishSubprogramDefinition(DISubprogram(P.first));
+ });
}
@@ -854,26 +537,7 @@ void DwarfDebug::collectDeadVariables() {
DISubprogram SP(Subprograms.getElement(i));
if (ProcessedSPNodes.count(SP) != 0)
continue;
- assert(SP.isSubprogram() &&
- "CU's subprogram list contains a non-subprogram");
- assert(SP.isDefinition() &&
- "CU's subprogram list contains a subprogram declaration");
- DIArray Variables = SP.getVariables();
- if (Variables.getNumElements() == 0)
- continue;
-
- DIE *SPDIE = AbstractSPDies.lookup(SP);
- if (!SPDIE)
- SPDIE = SPCU->getDIE(SP);
- assert(SPDIE);
- for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
- DIVariable DV(Variables.getElement(vi));
- assert(DV.isVariable());
- DbgVariable NewVar(DV, this);
- auto VariableDie = SPCU->constructVariableDIE(NewVar);
- SPCU->applyVariableAttributes(NewVar, *VariableDie);
- SPDIE->addChild(std::move(VariableDie));
- }
+ SPCU->collectDeadVariables(SP);
}
}
}
@@ -889,66 +553,52 @@ void DwarfDebug::finalizeModuleInfo() {
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
- for (const auto &TheU : getUnits()) {
+ for (const auto &P : CUMap) {
+ auto &TheCU = *P.second;
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
- TheU->constructContainingTypeDIEs();
+ TheCU.constructContainingTypeDIEs();
// Add CU specific attributes if we need to add any.
- if (TheU->getUnitDie().getTag() == dwarf::DW_TAG_compile_unit) {
- // If we're splitting the dwarf out now that we've got the entire
- // CU then add the dwo id to it.
- DwarfCompileUnit *SkCU =
- static_cast<DwarfCompileUnit *>(TheU->getSkeleton());
- if (useSplitDwarf()) {
- // Emit a unique identifier for this CU.
- uint64_t ID = DIEHash(Asm).computeCUSignature(TheU->getUnitDie());
- TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
- dwarf::DW_FORM_data8, ID);
- SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
- dwarf::DW_FORM_data8, ID);
-
- // We don't keep track of which addresses are used in which CU so this
- // is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty())
- addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(),
- dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym,
- DwarfAddrSectionSym);
- if (!TheU->getRangeLists().empty())
- addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(),
- dwarf::DW_AT_GNU_ranges_base,
- DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym);
- }
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then add the dwo id to it.
+ auto *SkCU = TheCU.getSkeleton();
+ if (useSplitDwarf()) {
+ // Emit a unique identifier for this CU.
+ uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie());
+ TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+
+ // We don't keep track of which addresses are used in which CU so this
+ // is a bit pessimistic under LTO.
+ if (!AddrPool.isEmpty())
+ SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
+ DwarfAddrSectionSym, DwarfAddrSectionSym);
+ if (!SkCU->getRangeLists().empty())
+ SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
+ DwarfDebugRangeSectionSym,
+ DwarfDebugRangeSectionSym);
+ }
- // If we have code split among multiple sections or non-contiguous
- // ranges of code then emit a DW_AT_ranges attribute on the unit that will
- // remain in the .o file, otherwise add a DW_AT_low_pc.
- // FIXME: We should use ranges allow reordering of code ala
- // .subsections_via_symbols in mach-o. This would mean turning on
- // ranges for all subprogram DIEs for mach-o.
- DwarfCompileUnit &U =
- SkCU ? *SkCU : static_cast<DwarfCompileUnit &>(*TheU);
- unsigned NumRanges = TheU->getRanges().size();
- if (NumRanges) {
- if (NumRanges > 1) {
- addSectionLabel(*Asm, U, U.getUnitDie(), dwarf::DW_AT_ranges,
- Asm->GetTempSymbol("cu_ranges", U.getUniqueID()),
- DwarfDebugRangeSectionSym);
-
- // A DW_AT_low_pc attribute may also be specified in combination with
- // DW_AT_ranges to specify the default base address for use in
- // location lists (see Section 2.6.2) and range lists (see Section
- // 2.17.3).
- U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- 0);
- } else {
- RangeSpan &Range = TheU->getRanges().back();
- U.addLocalLabelAddress(U.getUnitDie(), dwarf::DW_AT_low_pc,
- Range.getStart());
- U.addLabelDelta(U.getUnitDie(), dwarf::DW_AT_high_pc, Range.getEnd(),
- Range.getStart());
- }
- }
+ // If we have code split among multiple sections or non-contiguous
+ // ranges of code then emit a DW_AT_ranges attribute on the unit that will
+ // remain in the .o file, otherwise add a DW_AT_low_pc.
+ // FIXME: We should use ranges allow reordering of code ala
+ // .subsections_via_symbols in mach-o. This would mean turning on
+ // ranges for all subprogram DIEs for mach-o.
+ DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+ if (unsigned NumRanges = TheCU.getRanges().size()) {
+ if (NumRanges > 1)
+ // A DW_AT_low_pc attribute may also be specified in combination with
+ // DW_AT_ranges to specify the default base address for use in
+ // location lists (see Section 2.6.2) and range lists (see Section
+ // 2.17.3).
+ U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+ else
+ TheCU.setBaseAddress(TheCU.getRanges().front().getStart());
+ U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
}
@@ -1010,7 +660,10 @@ void DwarfDebug::endModule() {
assert(CurFn == nullptr);
assert(CurMI == nullptr);
- if (!FirstCU)
+ // If we aren't actually generating debug info (check beginModule -
+ // conditionalized on !DisableDebugInfoPrinting and the presence of the
+ // llvm.dbg.cu metadata node)
+ if (!DwarfInfoSectionSym)
return;
// End any existing sections.
@@ -1064,9 +717,6 @@ void DwarfDebug::endModule() {
// clean up.
SPMap.clear();
AbstractVariables.clear();
-
- // Reset these for the next Module if we have one.
- FirstCU = nullptr;
}
// Find abstract variable, if any, associated with Var.
@@ -1092,8 +742,8 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
void DwarfDebug::createAbstractVariable(const DIVariable &Var,
LexicalScope *Scope) {
- auto AbsDbgVariable = make_unique<DbgVariable>(Var, this);
- addScopeVariable(Scope, AbsDbgVariable.get());
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this);
+ InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
AbstractVariables[Var] = std::move(AbsDbgVariable);
}
@@ -1117,55 +767,35 @@ DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
createAbstractVariable(Cleansed, Scope);
}
-// If Var is a current function argument then add it to CurrentFnArguments list.
-bool DwarfDebug::addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope) {
- if (!LScopes.isCurrentFunctionScope(Scope))
- return false;
- DIVariable DV = Var->getVariable();
- if (DV.getTag() != dwarf::DW_TAG_arg_variable)
- return false;
- unsigned ArgNo = DV.getArgNumber();
- if (ArgNo == 0)
- return false;
-
- size_t Size = CurrentFnArguments.size();
- if (Size == 0)
- CurrentFnArguments.resize(CurFn->getFunction()->arg_size());
- // llvm::Function argument size is not good indicator of how many
- // arguments does the function have at source level.
- if (ArgNo > Size)
- CurrentFnArguments.resize(ArgNo * 2);
- CurrentFnArguments[ArgNo - 1] = Var;
- return true;
-}
-
// Collect variable information from side table maintained by MMI.
void DwarfDebug::collectVariableInfoFromMMITable(
- SmallPtrSet<const MDNode *, 16> &Processed) {
+ SmallPtrSetImpl<const MDNode *> &Processed) {
for (const auto &VI : MMI->getVariableDbgInfo()) {
if (!VI.Var)
continue;
Processed.insert(VI.Var);
- DIVariable DV(VI.Var);
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
+ DIVariable DV(VI.Var);
+ DIExpression Expr(VI.Expr);
ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
- ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
+ ConcreteVariables.push_back(make_unique<DbgVariable>(DV, Expr, this));
DbgVariable *RegVar = ConcreteVariables.back().get();
RegVar->setFrameIndex(VI.Slot);
- addScopeVariable(Scope, RegVar);
+ InfoHolder.addScopeVariable(Scope, RegVar);
}
}
// Get .debug_loc entry for the instruction range starting at MI.
static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
+ const MDNode *Expr = MI->getDebugExpression();
const MDNode *Var = MI->getDebugVariable();
- assert(MI->getNumOperands() == 3);
+ assert(MI->getNumOperands() == 4);
if (MI->getOperand(0).isReg()) {
MachineLocation MLoc;
// If the second operand is an immediate, this is a
@@ -1174,24 +804,138 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
MLoc.set(MI->getOperand(0).getReg());
else
MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
- return DebugLocEntry::Value(Var, MLoc);
+ return DebugLocEntry::Value(Var, Expr, MLoc);
}
if (MI->getOperand(0).isImm())
- return DebugLocEntry::Value(Var, MI->getOperand(0).getImm());
+ return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getImm());
if (MI->getOperand(0).isFPImm())
- return DebugLocEntry::Value(Var, MI->getOperand(0).getFPImm());
+ return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getFPImm());
if (MI->getOperand(0).isCImm())
- return DebugLocEntry::Value(Var, MI->getOperand(0).getCImm());
+ return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getCImm());
- llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
+ llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
}
-// Find variables for each lexical scope.
+/// Determine whether two variable pieces overlap.
+static bool piecesOverlap(DIExpression P1, DIExpression P2) {
+ if (!P1.isVariablePiece() || !P2.isVariablePiece())
+ return true;
+ unsigned l1 = P1.getPieceOffset();
+ unsigned l2 = P2.getPieceOffset();
+ unsigned r1 = l1 + P1.getPieceSize();
+ unsigned r2 = l2 + P2.getPieceSize();
+ // True where [l1,r1[ and [r1,r2[ overlap.
+ return (l1 < r2) && (l2 < r1);
+}
+
+/// Build the location list for all DBG_VALUEs in the function that
+/// describe the same variable. If the ranges of several independent
+/// pieces of the same variable overlap partially, split them up and
+/// combine the ranges. The resulting DebugLocEntries are will have
+/// strict monotonically increasing begin addresses and will never
+/// overlap.
+//
+// Input:
+//
+// Ranges History [var, loc, piece ofs size]
+// 0 | [x, (reg0, piece 0, 32)]
+// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
+// 2 | | ...
+// 3 | [clobber reg0]
+// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x.
+//
+// Output:
+//
+// [0-1] [x, (reg0, piece 0, 32)]
+// [1-3] [x, (reg0, piece 0, 32), (reg1, piece 32, 32)]
+// [3-4] [x, (reg1, piece 32, 32)]
+// [4- ] [x, (mem, piece 0, 64)]
void
-DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
- LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::InstrRanges &Ranges) {
+ SmallVector<DebugLocEntry::Value, 4> OpenRanges;
+
+ for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+ const MachineInstr *Begin = I->first;
+ const MachineInstr *End = I->second;
+ assert(Begin->isDebugValue() && "Invalid History entry");
+
+ // Check if a variable is inaccessible in this range.
+ if (Begin->getNumOperands() > 1 &&
+ Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) {
+ OpenRanges.clear();
+ continue;
+ }
+
+ // If this piece overlaps with any open ranges, truncate them.
+ DIExpression DIExpr = Begin->getDebugExpression();
+ auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+ [&](DebugLocEntry::Value R) {
+ return piecesOverlap(DIExpr, R.getExpression());
+ });
+ OpenRanges.erase(Last, OpenRanges.end());
+
+ const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
+ assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
+
+ const MCSymbol *EndLabel;
+ if (End != nullptr)
+ EndLabel = getLabelAfterInsn(End);
+ else if (std::next(I) == Ranges.end())
+ EndLabel = FunctionEndSym;
+ else
+ EndLabel = getLabelBeforeInsn(std::next(I)->first);
+ assert(EndLabel && "Forgot label after instruction ending a range!");
+ DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
+
+ auto Value = getDebugLocValue(Begin);
+ DebugLocEntry Loc(StartLabel, EndLabel, Value);
+ bool couldMerge = false;
+
+ // If this is a piece, it may belong to the current DebugLocEntry.
+ if (DIExpr.isVariablePiece()) {
+ // Add this value to the list of open ranges.
+ OpenRanges.push_back(Value);
+
+ // Attempt to add the piece to the last entry.
+ if (!DebugLoc.empty())
+ if (DebugLoc.back().MergeValues(Loc))
+ couldMerge = true;
+ }
+
+ if (!couldMerge) {
+ // Need to add a new DebugLocEntry. Add all values from still
+ // valid non-overlapping pieces.
+ if (OpenRanges.size())
+ Loc.addValues(OpenRanges);
+
+ DebugLoc.push_back(std::move(Loc));
+ }
+
+ // Attempt to coalesce the ranges of two otherwise identical
+ // DebugLocEntries.
+ auto CurEntry = DebugLoc.rbegin();
+ auto PrevEntry = std::next(CurEntry);
+ if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
+ DebugLoc.pop_back();
+
+ DEBUG({
+ dbgs() << CurEntry->getValues().size() << " Values:\n";
+ for (auto Value : CurEntry->getValues()) {
+ Value.getVariable()->dump();
+ Value.getExpression()->dump();
+ }
+ dbgs() << "-----\n";
+ });
+ }
+}
+
+
+// Find variables for each lexical scope.
+void
+DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
+ SmallPtrSetImpl<const MDNode *> &Processed) {
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMMITable(Processed);
@@ -1206,10 +950,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
continue;
LexicalScope *Scope = nullptr;
- if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
- DISubprogram(DV.getContext()).describes(CurFn->getFunction()))
- Scope = LScopes.getCurrentFunctionScope();
- else if (MDNode *IA = DV.getInlinedAt()) {
+ if (MDNode *IA = DV.getInlinedAt()) {
DebugLoc DL = DebugLoc::getFromDILocation(IA);
Scope = LScopes.findInlinedScope(DebugLoc::get(
DL.getLine(), DL.getCol(), DV.getContext(), IA));
@@ -1225,7 +966,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
DbgVariable *RegVar = ConcreteVariables.back().get();
- addScopeVariable(Scope, RegVar);
+ InfoHolder.addScopeVariable(Scope, RegVar);
// Check if the first DBG_VALUE is valid for the rest of the function.
if (Ranges.size() == 1 && Ranges.front().second == nullptr)
@@ -1236,53 +977,26 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1);
DebugLocList &LocList = DotDebugLocEntries.back();
+ LocList.CU = &TheCU;
LocList.Label =
Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1);
- SmallVector<DebugLocEntry, 4> &DebugLoc = LocList.List;
- for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
- const MachineInstr *Begin = I->first;
- const MachineInstr *End = I->second;
- assert(Begin->isDebugValue() && "Invalid History entry");
-
- // Check if a variable is unaccessible in this range.
- if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() &&
- !Begin->getOperand(0).getReg())
- continue;
- DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin);
- if (End != nullptr)
- DEBUG(dbgs() << "\t" << *End);
- else
- DEBUG(dbgs() << "\tNULL\n");
-
- const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
- assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
-
- const MCSymbol *EndLabel;
- if (End != nullptr)
- EndLabel = getLabelAfterInsn(End);
- else if (std::next(I) == Ranges.end())
- EndLabel = FunctionEndSym;
- else
- EndLabel = getLabelBeforeInsn(std::next(I)->first);
- assert(EndLabel && "Forgot label after instruction ending a range!");
- DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU);
- if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc))
- DebugLoc.push_back(std::move(Loc));
- }
+ // Build the location list for this variable.
+ buildLocationList(LocList.List, Ranges);
}
// Collect info for variables that were optimized out.
- DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables();
+ DIArray Variables = SP.getVariables();
for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
DIVariable DV(Variables.getElement(i));
assert(DV.isVariable());
- if (!Processed.insert(DV))
+ if (!Processed.insert(DV).second)
continue;
if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
- ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
- addScopeVariable(Scope, ConcreteVariables.back().get());
+ DIExpression NoExpr;
+ ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this));
+ InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get());
}
}
}
@@ -1458,7 +1172,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
Asm->OutStreamer.EmitLabel(FunctionBeginSym);
// Calculate history for local variables.
- calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues);
+ calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(),
+ DbgValues);
// Request labels for the full history.
for (const auto &I : DbgValues) {
@@ -1468,10 +1183,24 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// The first mention of a function argument gets the FunctionBeginSym
// label, so arguments are visible when breaking at function entry.
- DIVariable DV(I.first);
- if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
- getDISubprogram(DV.getContext()).describes(MF->getFunction()))
+ DIVariable DIVar(Ranges.front().first->getDebugVariable());
+ if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
+ getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym;
+ if (Ranges.front().first->getDebugExpression().isVariablePiece()) {
+ // Mark all non-overlapping initial pieces.
+ for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
+ DIExpression Piece = I->first->getDebugExpression();
+ if (std::all_of(Ranges.begin(), I,
+ [&](DbgValueHistoryMap::InstrRange Pred) {
+ return !piecesOverlap(Piece, Pred.first->getDebugExpression());
+ }))
+ LabelsBeforeInsn[I->first] = FunctionBeginSym;
+ else
+ break;
+ }
+ }
+ }
for (const auto &Range : Ranges) {
requestLabelBeforeInsn(Range.first);
@@ -1497,56 +1226,16 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
}
-void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
- if (addCurrentFnArgument(Var, LS))
- return;
- SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
- DIVariable DV = Var->getVariable();
- // Variables with positive arg numbers are parameters.
- if (unsigned ArgNum = DV.getArgNumber()) {
- // Keep all parameters in order at the start of the variable list to ensure
- // function types are correct (no out-of-order parameters)
- //
- // This could be improved by only doing it for optimized builds (unoptimized
- // builds have the right order to begin with), searching from the back (this
- // would catch the unoptimized case quickly), or doing a binary search
- // rather than linear search.
- SmallVectorImpl<DbgVariable *>::iterator I = Vars.begin();
- while (I != Vars.end()) {
- unsigned CurNum = (*I)->getVariable().getArgNumber();
- // A local (non-parameter) variable has been found, insert immediately
- // before it.
- if (CurNum == 0)
- break;
- // A later indexed parameter has been found, insert immediately before it.
- if (CurNum > ArgNum)
- break;
- ++I;
- }
- Vars.insert(I, Var);
- return;
- }
-
- Vars.push_back(Var);
-}
-
// Gather and emit post-function debug information.
void DwarfDebug::endFunction(const MachineFunction *MF) {
- // Every beginFunction(MF) call should be followed by an endFunction(MF) call,
- // though the beginFunction may not be called at all.
- // We should handle both cases.
- if (!CurFn)
- CurFn = MF;
- else
- assert(CurFn == MF);
- assert(CurFn != nullptr);
+ assert(CurFn == MF &&
+ "endFunction should be called with the same function as beginFunction");
if (!MMI->hasDebugInfo() || LScopes.empty() ||
!FunctionDIs.count(MF->getFunction())) {
// If we don't have a lexical scope for this function then there will
// be a hole in the range information. Keep note of this by setting the
// previously used section to nullptr.
- PrevSection = nullptr;
PrevCU = nullptr;
CurFn = nullptr;
return;
@@ -1560,45 +1249,63 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ DISubprogram SP(FnScope->getScopeNode());
+ DwarfCompileUnit &TheCU = *SPMap.lookup(SP);
+
SmallPtrSet<const MDNode *, 16> ProcessedVars;
- collectVariableInfo(ProcessedVars);
+ collectVariableInfo(TheCU, SP, ProcessedVars);
- LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- DwarfCompileUnit &TheCU = *SPMap.lookup(FnScope->getScopeNode());
+ // Add the range of this function to the list of ranges for the CU.
+ TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym));
+
+ // Under -gmlt, skip building the subprogram if there are no inlined
+ // subroutines inside it.
+ if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly &&
+ LScopes.getAbstractScopesList().empty() && !IsDarwin) {
+ assert(InfoHolder.getScopeVariables().empty());
+ assert(DbgValues.empty());
+ // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed
+ // by a -gmlt CU. Add a test and remove this assertion.
+ assert(AbstractVariables.empty());
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+ return;
+ }
+#ifndef NDEBUG
+ size_t NumAbstractScopes = LScopes.getAbstractScopesList().size();
+#endif
// Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
DISubprogram SP(AScope->getScopeNode());
- if (!SP.isSubprogram())
- continue;
+ assert(SP.isSubprogram());
// Collect info for variables that were optimized out.
DIArray Variables = SP.getVariables();
for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
DIVariable DV(Variables.getElement(i));
assert(DV && DV.isVariable());
- if (!ProcessedVars.insert(DV))
+ if (!ProcessedVars.insert(DV).second)
continue;
ensureAbstractVariableIsCreated(DV, DV.getContext());
+ assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
+ && "ensureAbstractVariableIsCreated inserted abstract scopes");
}
- constructAbstractSubprogramScopeDIE(TheCU, AScope);
+ constructAbstractSubprogramScopeDIE(AScope);
}
- DIE &CurFnDIE = constructSubprogramScopeDIE(TheCU, FnScope);
- if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn))
- TheCU.addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
-
- // Add the range of this function to the list of ranges for the CU.
- RangeSpan Span(FunctionBeginSym, FunctionEndSym);
- TheCU.addRange(std::move(Span));
- PrevSection = Asm->getCurrentSection();
- PrevCU = &TheCU;
+ TheCU.constructSubprogramScopeDIE(FnScope);
+ if (auto *SkelCU = TheCU.getSkeleton())
+ if (!LScopes.getAbstractScopesList().empty())
+ SkelCU->constructSubprogramScopeDIE(FnScope);
// Clear debug info
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
- ScopeVariables.clear();
- CurrentFnArguments.clear();
+ InfoHolder.getScopeVariables().clear();
DbgValues.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
@@ -1618,8 +1325,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
assert(Scope.isScope());
Fn = Scope.getFilename();
Dir = Scope.getDirectory();
- if (Scope.isLexicalBlock())
- Discriminator = DILexicalBlock(S).getDiscriminator();
+ if (Scope.isLexicalBlockFile())
+ Discriminator = DILexicalBlockFile(S).getDiscriminator();
unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID();
Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
@@ -1640,9 +1347,12 @@ void DwarfDebug::emitSectionLabels() {
// Dwarf sections base addresses.
DwarfInfoSectionSym =
emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
- if (useSplitDwarf())
+ if (useSplitDwarf()) {
DwarfInfoDWOSectionSym =
emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo");
+ DwarfTypesDWOSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo");
+ }
DwarfAbbrevSectionSym =
emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
if (useSplitDwarf())
@@ -1726,7 +1436,7 @@ void DwarfDebug::emitDIE(DIE &Die) {
void DwarfDebug::emitDebugInfo() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- Holder.emitUnits(this, DwarfAbbrevSectionSym);
+ Holder.emitUnits(DwarfAbbrevSectionSym);
}
// Emit the abbreviation section.
@@ -1760,54 +1470,41 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->EmitInt8(1);
}
-// Emit visible names into a hashed accelerator table section.
-void DwarfDebug::emitAccelNames() {
- AccelNames.FinalizeTable(Asm, "Names");
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfAccelNamesSection());
- MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
+void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
+ StringRef TableName, StringRef SymName) {
+ Accel.FinalizeTable(Asm, TableName);
+ Asm->OutStreamer.SwitchSection(Section);
+ auto *SectionBegin = Asm->GetTempSymbol(SymName);
Asm->OutStreamer.EmitLabel(SectionBegin);
// Emit the full data.
- AccelNames.Emit(Asm, SectionBegin, &InfoHolder);
+ Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym);
+}
+
+// Emit visible names into a hashed accelerator table section.
+void DwarfDebug::emitAccelNames() {
+ emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
+ "Names", "names_begin");
}
// Emit objective C classes and categories into a hashed accelerator table
// section.
void DwarfDebug::emitAccelObjC() {
- AccelObjC.FinalizeTable(Asm, "ObjC");
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfAccelObjCSection());
- MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
- Asm->OutStreamer.EmitLabel(SectionBegin);
-
- // Emit the full data.
- AccelObjC.Emit(Asm, SectionBegin, &InfoHolder);
+ emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
+ "ObjC", "objc_begin");
}
// Emit namespace dies into a hashed accelerator table.
void DwarfDebug::emitAccelNamespaces() {
- AccelNamespace.FinalizeTable(Asm, "namespac");
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfAccelNamespaceSection());
- MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
- Asm->OutStreamer.EmitLabel(SectionBegin);
-
- // Emit the full data.
- AccelNamespace.Emit(Asm, SectionBegin, &InfoHolder);
+ emitAccel(AccelNamespace,
+ Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
+ "namespac", "namespac_begin");
}
// Emit type dies into a hashed accelerator table.
void DwarfDebug::emitAccelTypes() {
-
- AccelTypes.FinalizeTable(Asm, "types");
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfAccelTypesSection());
- MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
- Asm->OutStreamer.EmitLabel(SectionBegin);
-
- // Emit the full data.
- AccelTypes.Emit(Asm, SectionBegin, &InfoHolder);
+ emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
+ "types", "types_begin");
}
// Public name handling.
@@ -1874,12 +1571,13 @@ void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
: Asm->getObjFileLowering().getDwarfPubNamesSection();
- emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfUnit::getGlobalNames);
+ emitDebugPubSection(GnuStyle, PSec, "Names",
+ &DwarfCompileUnit::getGlobalNames);
}
void DwarfDebug::emitDebugPubSection(
bool GnuStyle, const MCSection *PSec, StringRef Name,
- const StringMap<const DIE *> &(DwarfUnit::*Accessor)() const) {
+ const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) {
for (const auto &NU : CUMap) {
DwarfCompileUnit *TheU = NU.second;
@@ -1888,7 +1586,7 @@ void DwarfDebug::emitDebugPubSection(
if (Globals.empty())
continue;
- if (auto Skeleton = static_cast<DwarfCompileUnit *>(TheU->getSkeleton()))
+ if (auto *Skeleton = TheU->getSkeleton())
TheU = Skeleton;
unsigned ID = TheU->getUniqueID();
@@ -1910,7 +1608,7 @@ void DwarfDebug::emitDebugPubSection(
Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym());
Asm->OutStreamer.AddComment("Compilation Unit Length");
- Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4);
+ Asm->EmitInt32(TheU->getLength());
// Emit the pubnames for this compilation unit.
for (const auto &GI : Globals) {
@@ -1943,7 +1641,8 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
: Asm->getObjFileLowering().getDwarfPubTypesSection();
- emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes);
+ emitDebugPubSection(GnuStyle, PSec, "Types",
+ &DwarfCompileUnit::getGlobalTypes);
}
// Emit visible names into a debug str section.
@@ -1952,12 +1651,67 @@ void DwarfDebug::emitDebugStr() {
Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
}
+/// Emits an optimal (=sorted) sequence of DW_OP_pieces.
+void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
+ const DITypeIdentifierMap &Map,
+ ArrayRef<DebugLocEntry::Value> Values) {
+ assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
+ return P.isVariablePiece();
+ }) && "all values are expected to be pieces");
+ assert(std::is_sorted(Values.begin(), Values.end()) &&
+ "pieces are expected to be sorted");
+
+ unsigned Offset = 0;
+ for (auto Piece : Values) {
+ DIExpression Expr = Piece.getExpression();
+ unsigned PieceOffset = Expr.getPieceOffset();
+ unsigned PieceSize = Expr.getPieceSize();
+ assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
+ if (Offset < PieceOffset) {
+ // The DWARF spec seriously mandates pieces with no locations for gaps.
+ Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*8);
+ Offset += PieceOffset-Offset;
+ }
+
+ Offset += PieceSize;
+
+ const unsigned SizeOfByte = 8;
+#ifndef NDEBUG
+ DIVariable Var = Piece.getVariable();
+ assert(!Var.isIndirect() && "indirect address for piece");
+ unsigned VarSize = Var.getSizeInBits(Map);
+ assert(PieceSize+PieceOffset <= VarSize/SizeOfByte
+ && "piece is larger than or outside of variable");
+ assert(PieceSize*SizeOfByte != VarSize
+ && "piece covers entire variable");
+#endif
+ if (Piece.isLocation() && Piece.getLoc().isReg())
+ Asm->EmitDwarfRegOpPiece(Streamer,
+ Piece.getLoc(),
+ PieceSize*SizeOfByte);
+ else {
+ emitDebugLocValue(Streamer, Piece);
+ Asm->EmitDwarfOpPiece(Streamer, PieceSize*SizeOfByte);
+ }
+ }
+}
+
+
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocEntry &Entry) {
- assert(Entry.getValues().size() == 1 &&
- "multi-value entries are not supported yet.");
const DebugLocEntry::Value Value = Entry.getValues()[0];
- DIVariable DV(Value.getVariable());
+ if (Value.isVariablePiece())
+ // Emit all pieces that belong to the same variable and range.
+ return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues());
+
+ assert(Entry.getValues().size() == 1 && "only pieces may have >1 value");
+ emitDebugLocValue(Streamer, Value);
+}
+
+void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
+ const DebugLocEntry::Value &Value) {
+ DIVariable DV = Value.getVariable();
+ // Regular entry.
if (Value.isInt()) {
DIBasicType BTy(resolve(DV.getType()));
if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
@@ -1970,24 +1724,25 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
}
} else if (Value.isLocation()) {
MachineLocation Loc = Value.getLoc();
- if (!DV.hasComplexAddress())
+ DIExpression Expr = Value.getExpression();
+ if (!Expr)
// Regular entry.
Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
else {
// Complex address entry.
- unsigned N = DV.getNumAddrElements();
+ unsigned N = Expr.getNumElements();
unsigned i = 0;
- if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+ if (N >= 2 && Expr.getElement(0) == dwarf::DW_OP_plus) {
if (Loc.getOffset()) {
i = 2;
Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst");
- Streamer.EmitSLEB128(DV.getAddrElement(1));
+ Streamer.EmitSLEB128(Expr.getElement(1));
} else {
// If first address element is OpPlus then emit
// DW_OP_breg + Offset instead of DW_OP_reg + Offset.
- MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1));
+ MachineLocation TLoc(Loc.getReg(), Expr.getElement(1));
Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect());
i = 2;
}
@@ -1997,13 +1752,16 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
// Emit remaining complex address elements.
for (; i < N; ++i) {
- uint64_t Element = DV.getAddrElement(i);
- if (Element == DIBuilder::OpPlus) {
+ uint64_t Element = Expr.getElement(i);
+ if (Element == dwarf::DW_OP_plus) {
Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst");
- Streamer.EmitULEB128(DV.getAddrElement(++i));
- } else if (Element == DIBuilder::OpDeref) {
+ Streamer.EmitULEB128(Expr.getElement(++i));
+ } else if (Element == dwarf::DW_OP_deref) {
if (!Loc.isReg())
Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
+ } else if (Element == dwarf::DW_OP_piece) {
+ i += 3;
+ // handled in emitDebugLocEntry.
} else
llvm_unreachable("unknown Opcode found in complex address");
}
@@ -2035,14 +1793,12 @@ void DwarfDebug::emitDebugLoc() {
unsigned char Size = Asm->getDataLayout().getPointerSize();
for (const auto &DebugLoc : DotDebugLocEntries) {
Asm->OutStreamer.EmitLabel(DebugLoc.Label);
+ const DwarfCompileUnit *CU = DebugLoc.CU;
for (const auto &Entry : DebugLoc.List) {
// Set up the range. This range is relative to the entry point of the
// compile unit. This is a hard coded 0 for low_pc when we're emitting
// ranges, or the DW_AT_low_pc on the compile unit otherwise.
- const DwarfCompileUnit *CU = Entry.getCU();
- if (CU->getRanges().size() == 1) {
- // Grab the begin symbol from the first range as our base.
- const MCSymbol *Base = CU->getRanges()[0].getStart();
+ if (auto *Base = CU->getBaseAddress()) {
Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size);
Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size);
} else {
@@ -2172,6 +1928,10 @@ void DwarfDebug::emitDebugARanges() {
for (DwarfCompileUnit *CU : CUs) {
std::vector<ArangeSpan> &List = Spans[CU];
+ // Describe the skeleton CU's offset and length, not the dwo file's.
+ if (auto *Skel = CU->getSkeleton())
+ CU = Skel;
+
// Emit size of content not including length itself.
unsigned ContentSize =
sizeof(int16_t) + // DWARF ARange version number
@@ -2194,7 +1954,7 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer.AddComment("DWARF Arange version number");
Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer.AddComment("Offset Into Debug Info Section");
- Asm->EmitSectionOffset(CU->getLocalLabelBegin(), CU->getLocalSectionSym());
+ Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym());
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(PtrSize);
Asm->OutStreamer.AddComment("Segment Size (in bytes)");
@@ -2238,6 +1998,9 @@ void DwarfDebug::emitDebugRanges() {
for (const auto &I : CUMap) {
DwarfCompileUnit *TheCU = I.second;
+ if (auto *Skel = TheCU->getSkeleton())
+ TheCU = Skel;
+
// Iterate over the misc ranges for the compile units in the module.
for (const RangeSpanList &List : TheCU->getRangeLists()) {
// Emit our symbol so we can find the beginning of the range.
@@ -2248,9 +2011,7 @@ void DwarfDebug::emitDebugRanges() {
const MCSymbol *End = Range.getEnd();
assert(Begin && "Range without a begin symbol?");
assert(End && "Range without an end symbol?");
- if (TheCU->getRanges().size() == 1) {
- // Grab the begin symbol from the first range as our base.
- const MCSymbol *Base = TheCU->getRanges()[0].getStart();
+ if (auto *Base = TheCU->getBaseAddress()) {
Asm->EmitLabelDifference(Begin, Base, Size);
Asm->EmitLabelDifference(End, Base, Size);
} else {
@@ -2263,23 +2024,6 @@ void DwarfDebug::emitDebugRanges() {
Asm->OutStreamer.EmitIntValue(0, Size);
Asm->OutStreamer.EmitIntValue(0, Size);
}
-
- // Now emit a range for the CU itself.
- if (TheCU->getRanges().size() > 1) {
- Asm->OutStreamer.EmitLabel(
- Asm->GetTempSymbol("cu_ranges", TheCU->getUniqueID()));
- for (const RangeSpan &Range : TheCU->getRanges()) {
- const MCSymbol *Begin = Range.getStart();
- const MCSymbol *End = Range.getEnd();
- assert(Begin && "Range without a begin symbol?");
- assert(End && "Range without an end symbol?");
- Asm->OutStreamer.EmitSymbolValue(Begin, Size);
- Asm->OutStreamer.EmitSymbolValue(End, Size);
- }
- // And terminate the list with two 0 values.
- Asm->OutStreamer.EmitIntValue(0, Size);
- Asm->OutStreamer.EmitIntValue(0, Size);
- }
}
}
@@ -2287,11 +2031,11 @@ void DwarfDebug::emitDebugRanges() {
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfUnit> NewU) {
- NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
- U.getCUNode().getSplitDebugFilename());
+ NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
+ U.getCUNode().getSplitDebugFilename());
if (!CompilationDir.empty())
- NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+ NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
addGnuPubAttributes(*NewU, Die);
@@ -2316,31 +2060,13 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
return NewCU;
}
-// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name,
-// DW_AT_addr_base.
-DwarfTypeUnit &DwarfDebug::constructSkeletonTU(DwarfTypeUnit &TU) {
- DwarfCompileUnit &CU = static_cast<DwarfCompileUnit &>(
- *SkeletonHolder.getUnits()[TU.getCU().getUniqueID()]);
-
- auto OwnedUnit = make_unique<DwarfTypeUnit>(TU.getUniqueID(), CU, Asm, this,
- &SkeletonHolder);
- DwarfTypeUnit &NewTU = *OwnedUnit;
- NewTU.setTypeSignature(TU.getTypeSignature());
- NewTU.setType(nullptr);
- NewTU.initSection(
- Asm->getObjFileLowering().getDwarfTypesSection(TU.getTypeSignature()));
-
- initSkeletonUnit(TU, NewTU.getUnitDie(), std::move(OwnedUnit));
- return NewTU;
-}
-
// Emit the .debug_info.dwo section for separated dwarf. This contains the
// compile units that would normally be in debug_info.
void DwarfDebug::emitDebugInfoDWO() {
assert(useSplitDwarf() && "No split dwarf debug info?");
// Don't pass an abbrev symbol, using a constant zero instead so as not to
// emit relocations into the dwo file.
- InfoHolder.emitUnits(this, /* AbbrevSymbol */ nullptr);
+ InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr);
}
// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
@@ -2364,9 +2090,8 @@ void DwarfDebug::emitDebugStrDWO() {
assert(useSplitDwarf() && "No split dwarf?");
const MCSection *OffSec =
Asm->getObjFileLowering().getDwarfStrOffDWOSection();
- const MCSymbol *StrSym = DwarfStrSectionSym;
InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
- OffSec, StrSym);
+ OffSec);
}
MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
@@ -2406,9 +2131,9 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
- auto OwnedUnit =
- make_unique<DwarfTypeUnit>(InfoHolder.getUnits().size(), CU, Asm, this,
- &InfoHolder, getDwoLineTable(CU));
+ auto OwnedUnit = make_unique<DwarfTypeUnit>(
+ InfoHolder.getUnits().size() + TypeUnitsUnderConstruction.size(), CU, Asm,
+ this, &InfoHolder, getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
TU = &NewTU;
@@ -2421,15 +2146,13 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
uint64_t Signature = makeTypeSignature(Identifier);
NewTU.setTypeSignature(Signature);
- if (!useSplitDwarf())
+ if (useSplitDwarf())
+ NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
+ else {
CU.applyStmtList(UnitDie);
-
- // FIXME: Skip using COMDAT groups for type units in the .dwo file once tools
- // such as DWP ( http://gcc.gnu.org/wiki/DebugFissionDWP ) can cope with it.
- NewTU.initSection(
- useSplitDwarf()
- ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature)
- : Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ NewTU.initSection(
+ Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ }
NewTU.setType(NewTU.createTypeDIE(CTy));
@@ -2457,29 +2180,12 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// If the type wasn't dependent on fission addresses, finish adding the type
// and all its dependent types.
- for (auto &TU : TypeUnitsToAdd) {
- if (useSplitDwarf())
- TU.first->setSkeleton(constructSkeletonTU(*TU.first));
+ for (auto &TU : TypeUnitsToAdd)
InfoHolder.addUnit(std::move(TU.first));
- }
}
CU.addDIETypeSignature(RefDie, NewTU);
}
-void DwarfDebug::attachLowHighPC(DwarfCompileUnit &Unit, DIE &D,
- MCSymbol *Begin, MCSymbol *End) {
- assert(Begin && "Begin label should not be null!");
- assert(End && "End label should not be null!");
- assert(Begin->isDefined() && "Invalid starting label");
- assert(End->isDefined() && "Invalid end label");
-
- Unit.addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
- if (DwarfVersion < 4)
- Unit.addLabelAddress(D, dwarf::DW_AT_high_pc, End);
- else
- Unit.addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
-}
-
// Accelerator table mutators - add each name along with its companion
// DIE to the proper table while ensuring that the name that we're going
// to reference is in the string table. We do this since the names we
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ffe4843..48c2809 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
-#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#include "DwarfFile.h"
#include "AsmPrinterHandler.h"
@@ -70,6 +70,7 @@ public:
/// \brief This class is used to track local variable information.
class DbgVariable {
DIVariable Var; // Variable Descriptor.
+ DIExpression Expr; // Complex address location expression.
DIE *TheDIE; // Variable DIE.
unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
@@ -78,18 +79,22 @@ class DbgVariable {
public:
/// Construct a DbgVariable from a DIVariable.
- DbgVariable(DIVariable V, DwarfDebug *DD)
- : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr),
- FrameIndex(~0), DD(DD) {}
+ DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD)
+ : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U),
+ MInsn(nullptr), FrameIndex(~0), DD(DD) {
+ assert(Var.Verify() && Expr.Verify());
+ }
/// Construct a DbgVariable from a DEBUG_VALUE.
/// AbstractVar may be NULL.
DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
- : Var(DbgValue->getDebugVariable()), TheDIE(nullptr),
- DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {}
+ : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()),
+ TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue),
+ FrameIndex(~0), DD(DD) {}
// Accessors.
DIVariable getVariable() const { return Var; }
+ DIExpression getExpression() const { return Expr; }
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
@@ -124,14 +129,14 @@ public:
bool variableHasComplexAddress() const {
assert(Var.isVariable() && "Invalid complex DbgVariable!");
- return Var.hasComplexAddress();
+ return Expr.getNumElements() > 0;
}
bool isBlockByrefVariable() const;
unsigned getNumAddrElements() const {
assert(Var.isVariable() && "Invalid complex DbgVariable!");
- return Var.getNumAddrElements();
+ return Expr.getNumElements();
}
- uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); }
+ uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); }
DIType getType() const;
private:
@@ -159,26 +164,15 @@ class DwarfDebug : public AsmPrinterHandler {
// All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
- // Handle to the compile unit used for the inline extension handling,
- // this is just so that the DIEValue allocator has a place to store
- // the particular elements.
- // FIXME: Store these off of DwarfDebug instead?
- DwarfCompileUnit *FirstCU;
-
// Maps MDNode with its corresponding DwarfCompileUnit.
MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
// Maps subprogram MDNode with its corresponding DwarfCompileUnit.
- DenseMap<const MDNode *, DwarfCompileUnit *> SPMap;
+ MapVector<const MDNode *, DwarfCompileUnit *> SPMap;
// Maps a CU DIE with its corresponding DwarfCompileUnit.
DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
- /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
- /// be shared across CUs, that is why we keep the map here instead
- /// of in DwarfCompileUnit.
- DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
-
// List of all labels used in aranges generation.
std::vector<SymbolCU> ArangeLabels;
@@ -189,19 +183,8 @@ class DwarfDebug : public AsmPrinterHandler {
typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType;
SectionMapType SectionMap;
- // List of arguments for current function.
- SmallVector<DbgVariable *, 8> CurrentFnArguments;
-
LexicalScopes LScopes;
- // Collection of abstract subprogram DIEs.
- DenseMap<const MDNode *, DIE *> AbstractSPDies;
-
- // Collection of dbg variables of a scope.
- typedef DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >
- ScopeVariablesMap;
- ScopeVariablesMap ScopeVariables;
-
// Collection of abstract variables.
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
@@ -210,10 +193,6 @@ class DwarfDebug : public AsmPrinterHandler {
// can refer to them in spite of insertions into this list.
SmallVector<DebugLocList, 4> DotDebugLocEntries;
- // Collection of subprogram DIEs that are marked (at the end of the module)
- // as DW_AT_inline.
- SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
-
// This is a collection of subprogram MDNodes that are processed to
// create DIEs.
SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
@@ -243,10 +222,6 @@ class DwarfDebug : public AsmPrinterHandler {
// If nonnull, stores the current machine instruction we're processing.
const MachineInstr *CurMI;
- // If nonnull, stores the section that the previous function was allocated to
- // emitting.
- const MCSection *PrevSection;
-
// If nonnull, stores the CU in which the previous subprogram was contained.
const DwarfCompileUnit *PrevCU;
@@ -258,6 +233,7 @@ class DwarfDebug : public AsmPrinterHandler {
MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym;
+ MCSymbol *DwarfTypesDWOSectionSym;
MCSymbol *DwarfStrDWOSectionSym;
MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym;
@@ -322,6 +298,7 @@ class DwarfDebug : public AsmPrinterHandler {
// True iff there are multiple CUs in this module.
bool SingleCU;
+ bool IsDarwin;
AddressPool AddrPool;
@@ -334,8 +311,6 @@ class DwarfDebug : public AsmPrinterHandler {
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
- void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
-
const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() {
return InfoHolder.getUnits();
}
@@ -350,45 +325,8 @@ class DwarfDebug : public AsmPrinterHandler {
void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
const MDNode *Scope);
- /// \brief Find DIE for the given subprogram and attach appropriate
- /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
- /// variables in this scope then create and insert DIEs for these
- /// variables.
- DIE &updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP);
-
- /// \brief A helper function to check whether the DIE for a given Scope is
- /// going to be null.
- bool isLexicalScopeDIENull(LexicalScope *Scope);
-
- /// \brief A helper function to construct a RangeSpanList for a given
- /// lexical scope.
- void addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE,
- const SmallVectorImpl<InsnRange> &Range);
-
- /// \brief Construct new DW_TAG_lexical_block for this scope and
- /// attach DW_AT_low_pc/DW_AT_high_pc labels.
- std::unique_ptr<DIE> constructLexicalScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
-
- /// \brief This scope represents inlined body of a function. Construct
- /// DIE to represent this concrete inlined copy of the function.
- std::unique_ptr<DIE> constructInlinedScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
-
- /// \brief Construct a DIE for this scope.
- std::unique_ptr<DIE> constructScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
- void createAndAddScopeChildren(DwarfCompileUnit &TheCU, LexicalScope *Scope,
- DIE &ScopeDIE);
/// \brief Construct a DIE for this abstract scope.
- void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
- /// \brief Construct a DIE for this subprogram scope.
- DIE &constructSubprogramScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
- /// A helper function to create children of a Scope DIE.
- DIE *createScopeChildrenDIE(DwarfCompileUnit &TheCU, LexicalScope *Scope,
- SmallVectorImpl<std::unique_ptr<DIE>> &Children);
+ void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
/// \brief Emit initial Dwarf sections with a label at the start of each one.
void emitSectionLabels();
@@ -424,6 +362,10 @@ class DwarfDebug : public AsmPrinterHandler {
/// the line matrix.
void emitEndOfLineMatrix(unsigned SectionEnd);
+ /// \brief Emit a specified accelerator table.
+ void emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
+ StringRef TableName, StringRef SymName);
+
/// \brief Emit visible names into a hashed accelerator table section.
void emitAccelNames();
@@ -449,10 +391,9 @@ class DwarfDebug : public AsmPrinterHandler {
/// index.
void emitDebugPubTypes(bool GnuStyle = false);
- void
- emitDebugPubSection(bool GnuStyle, const MCSection *PSec, StringRef Name,
- const StringMap<const DIE *> &(DwarfUnit::*Accessor)()
- const);
+ void emitDebugPubSection(
+ bool GnuStyle, const MCSection *PSec, StringRef Name,
+ const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
/// \brief Emit visible names into a debug str section.
void emitDebugStr();
@@ -507,15 +448,8 @@ class DwarfDebug : public AsmPrinterHandler {
DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit);
/// \brief Construct imported_module or imported_declaration DIE.
- void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N);
-
- /// \brief Construct import_module DIE.
- void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N,
- DIE &Context);
-
- /// \brief Construct import_module DIE.
- void constructImportedEntityDIE(DwarfCompileUnit &TheCU,
- const DIImportedEntity &Module, DIE &Context);
+ void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
+ const MDNode *N);
/// \brief Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
@@ -527,38 +461,29 @@ class DwarfDebug : public AsmPrinterHandler {
/// ending of a scope.
void identifyScopeMarkers();
- /// \brief If Var is an current function argument that add it in
- /// CurrentFnArguments list.
- bool addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope);
-
/// \brief Populate LexicalScope entries with variables' info.
- void collectVariableInfo(SmallPtrSet<const MDNode *, 16> &ProcessedVars);
+ void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
+ SmallPtrSetImpl<const MDNode *> &ProcessedVars);
+
+ /// \brief Build the location list for all DBG_VALUEs in the
+ /// function that describe the same variable.
+ void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::InstrRanges &Ranges);
/// \brief Collect variable information from the side table maintained
/// by MMI.
- void collectVariableInfoFromMMITable(SmallPtrSet<const MDNode *, 16> &P);
+ void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P);
/// \brief Ensure that a label will be emitted before MI.
void requestLabelBeforeInsn(const MachineInstr *MI) {
LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
}
- /// \brief Return Label preceding the instruction.
- MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
-
/// \brief Ensure that a label will be emitted after MI.
void requestLabelAfterInsn(const MachineInstr *MI) {
LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
}
- /// \brief Return Label immediately following the instruction.
- MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
-
- void attachRangesOrLowHighPC(DwarfCompileUnit &Unit, DIE &D,
- const SmallVectorImpl<InsnRange> &Ranges);
- void attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, MCSymbol *Begin,
- MCSymbol *End);
-
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -567,13 +492,6 @@ public:
~DwarfDebug() override;
- void insertDIE(const MDNode *TypeMD, DIE *Die) {
- MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
- }
- DIE *getDIE(const MDNode *TypeMD) {
- return MDTypeNodeToDieMap.lookup(TypeMD);
- }
-
/// \brief Emit all Dwarf sections that should come prior to the
/// content.
void beginModule();
@@ -626,11 +544,15 @@ public:
/// Returns the section symbol for the .debug_loc section.
MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; }
- /// Returns the previous section that was emitted into.
- const MCSection *getPrevSection() const { return PrevSection; }
+ /// Returns the section symbol for the .debug_str section.
+ MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; }
+
+ /// Returns the section symbol for the .debug_ranges section.
+ MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; }
/// Returns the previous CU that was being updated
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
+ void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
/// Returns the entries for the .debug_loc section.
const SmallVectorImpl<DebugLocList> &
@@ -641,6 +563,13 @@ public:
/// \brief Emit an entry for the debug loc section. This can be used to
/// handle an entry that's going to be emitted into the debug loc section.
void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry);
+ /// \brief emit a single value for the debug loc section.
+ void emitDebugLocValue(ByteStreamer &Streamer,
+ const DebugLocEntry::Value &Value);
+ /// Emits an optimal (=sorted) sequence of DW_OP_pieces.
+ void emitLocPieces(ByteStreamer &Streamer,
+ const DITypeIdentifierMap &Map,
+ ArrayRef<DebugLocEntry::Value> Values);
/// Emit the location for a debug loc entry, including the size header.
void emitDebugLocEntryLocation(const DebugLocEntry &Entry);
@@ -674,6 +603,40 @@ public:
void addAccelNamespace(StringRef Name, const DIE &Die);
void addAccelType(StringRef Name, const DIE &Die, char Flags);
+
+ const MachineFunction *getCurrentFunction() const { return CurFn; }
+ const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; }
+ const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; }
+
+ iterator_range<ImportedEntityMap::const_iterator>
+ findImportedEntitiesForScope(const MDNode *Scope) const {
+ return make_range(std::equal_range(
+ ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
+ std::pair<const MDNode *, const MDNode *>(Scope, nullptr),
+ less_first()));
+ }
+
+ /// \brief A helper function to check whether the DIE for a given Scope is
+ /// going to be null.
+ bool isLexicalScopeDIENull(LexicalScope *Scope);
+
+ /// \brief Return Label preceding the instruction.
+ MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// \brief Return Label immediately following the instruction.
+ MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+ // FIXME: Consider rolling ranges up into DwarfDebug since we use a single
+ // range_base anyway, so there's no need to keep them as separate per-CU range
+ // lists. (though one day we might end up with a range.dwo section, in which
+ // case it'd go to DwarfFile)
+ unsigned getNextRangeNumber() { return GlobalRangeCount++; }
+
+ // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
+
+ SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
+ return ProcessedSPNodes;
+ }
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 0440fce..e8867c0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
-#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
#include "EHStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -81,39 +81,6 @@ public:
/// endFunction - Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
};
-
-class Win64Exception : public EHStreamer {
- /// shouldEmitPersonality - Per-function flag to indicate if personality
- /// info should be emitted.
- bool shouldEmitPersonality;
-
- /// shouldEmitLSDA - Per-function flag to indicate if the LSDA
- /// should be emitted.
- bool shouldEmitLSDA;
-
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
- bool shouldEmitMoves;
-
-public:
- //===--------------------------------------------------------------------===//
- // Main entry points.
- //
- Win64Exception(AsmPrinter *A);
- virtual ~Win64Exception();
-
- /// endModule - Emit all exception information that should come after the
- /// content.
- void endModule() override;
-
- /// beginFunction - Gather pre-function exception information. Assumes being
- /// emitted immediately after the function entry point.
- void beginFunction(const MachineFunction *MF) override;
-
- /// endFunction - Gather and emit post-function exception information.
- void endFunction(const MachineFunction *) override;
-};
-
} // End of namespace llvm
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 737ee54..50180ea 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -18,8 +18,9 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
-DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
- : Asm(AP), StrPool(DA, *Asm, Pref) {}
+DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
+ BumpPtrAllocator &DA)
+ : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {}
DwarfFile::~DwarfFile() {}
@@ -48,25 +49,18 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
// Emit the various dwarf units to the unit section USection with
// the abbreviations going into ASection.
-void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) {
+void DwarfFile::emitUnits(const MCSymbol *ASectionSym) {
for (const auto &TheU : CUs) {
DIE &Die = TheU->getUnitDie();
const MCSection *USection = TheU->getSection();
Asm->OutStreamer.SwitchSection(USection);
- // Emit the compile units header.
- Asm->OutStreamer.EmitLabel(TheU->getLabelBegin());
-
- // Emit size of content not including length itself
- Asm->OutStreamer.AddComment("Length of Unit");
- Asm->EmitInt32(TheU->getHeaderSize() + Die.getSize());
-
TheU->emitHeader(ASectionSym);
- DD->emitDIE(Die);
- Asm->OutStreamer.EmitLabel(TheU->getLabelEnd());
+ DD.emitDIE(Die);
}
}
+
// Compute the size and offset for each DIE.
void DwarfFile::computeSizeAndOffsets() {
// Offset from the first CU in the debug info section is 0 initially.
@@ -149,8 +143,44 @@ void DwarfFile::emitAbbrevs(const MCSection *Section) {
// Emit strings into a string section.
void DwarfFile::emitStrings(const MCSection *StrSection,
- const MCSection *OffsetSection,
- const MCSymbol *StrSecSym) {
- StrPool.emit(*Asm, StrSection, OffsetSection, StrSecSym);
+ const MCSection *OffsetSection) {
+ StrPool.emit(*Asm, StrSection, OffsetSection);
+}
+
+void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+ SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
+ DIVariable DV = Var->getVariable();
+ // Variables with positive arg numbers are parameters.
+ if (unsigned ArgNum = DV.getArgNumber()) {
+ // Keep all parameters in order at the start of the variable list to ensure
+ // function types are correct (no out-of-order parameters)
+ //
+ // This could be improved by only doing it for optimized builds (unoptimized
+ // builds have the right order to begin with), searching from the back (this
+ // would catch the unoptimized case quickly), or doing a binary search
+ // rather than linear search.
+ auto I = Vars.begin();
+ while (I != Vars.end()) {
+ unsigned CurNum = (*I)->getVariable().getArgNumber();
+ // A local (non-parameter) variable has been found, insert immediately
+ // before it.
+ if (CurNum == 0)
+ break;
+ // A later indexed parameter has been found, insert immediately before it.
+ if (CurNum > ArgNum)
+ break;
+ // FIXME: There are still some cases where two inlined functions are
+ // conflated together (two calls to the same function at the same
+ // location (eg: via a macro, or without column info, etc)) and then
+ // their arguments are conflated as well.
+ assert((LS->getParent() || CurNum != ArgNum) &&
+ "Duplicate argument for top level (non-inlined) function");
+ ++I;
+ }
+ Vars.insert(I, Var);
+ return;
+ }
+
+ Vars.push_back(Var);
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 3985eb2..9d64bfc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DWARFFILE_H__
-#define CODEGEN_ASMPRINTER_DWARFFILE_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
@@ -24,10 +24,13 @@
namespace llvm {
class AsmPrinter;
+class DbgVariable;
class DwarfUnit;
class DIEAbbrev;
class MCSymbol;
class DIE;
+class DISubprogram;
+class LexicalScope;
class StringRef;
class DwarfDebug;
class MCSection;
@@ -35,6 +38,8 @@ class DwarfFile {
// Target of Dwarf emission, used for sizing of abbreviations.
AsmPrinter *Asm;
+ DwarfDebug &DD;
+
// Used to uniquely define abbreviations.
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -46,8 +51,20 @@ class DwarfFile {
DwarfStringPool StrPool;
+ // Collection of dbg variables of a scope.
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables;
+
+ // Collection of abstract subprogram DIEs.
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
+ /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
+ /// be shared across CUs, that is why we keep the map here instead
+ /// of in DwarfCompileUnit.
+ DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
+
public:
- DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
+ DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
+ BumpPtrAllocator &DA);
~DwarfFile();
@@ -67,18 +84,34 @@ public:
/// \brief Emit all of the units to the section listed with the given
/// abbreviation section.
- void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym);
+ void emitUnits(const MCSymbol *ASectionSym);
/// \brief Emit a set of abbreviations to the specific section.
void emitAbbrevs(const MCSection *);
/// \brief Emit all of the strings to the section given.
void emitStrings(const MCSection *StrSection,
- const MCSection *OffsetSection = nullptr,
- const MCSymbol *StrSecSym = nullptr);
+ const MCSection *OffsetSection = nullptr);
/// \brief Returns the string pool.
DwarfStringPool &getStringPool() { return StrPool; }
+
+ void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() {
+ return ScopeVariables;
+ }
+
+ DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
+ return AbstractSPDies;
+ }
+
+ void insertDIE(const MDNode *TypeMD, DIE *Die) {
+ MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
+ }
+ DIE *getDIE(const MDNode *TypeMD) {
+ return MDTypeNodeToDieMap.lookup(TypeMD);
+ }
};
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 72cab60..d76b66c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -12,14 +12,11 @@
using namespace llvm;
-MCSymbol *DwarfStringPool::getSectionSymbol() { return SectionSymbol; }
-
static std::pair<MCSymbol *, unsigned> &
getEntry(AsmPrinter &Asm,
StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> &Pool,
StringRef Prefix, StringRef Str) {
- std::pair<MCSymbol *, unsigned> &Entry =
- Pool.GetOrCreateValue(Str).getValue();
+ std::pair<MCSymbol *, unsigned> &Entry = Pool[Str];
if (!Entry.first) {
Entry.second = Pool.size() - 1;
Entry.first = Asm.GetTempSymbol(Prefix, Entry.second);
@@ -36,8 +33,7 @@ unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) {
}
void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection,
- const MCSection *OffsetSection,
- const MCSymbol *StrSecSym) {
+ const MCSection *OffsetSection) {
if (Pool.empty())
return;
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index c1615fb..ab32c1b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_STRINGPOOL_H__
-#define CODEGEN_ASMPRINTER_STRINGPOOL_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -28,18 +28,13 @@ class StringRef;
class DwarfStringPool {
StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> Pool;
StringRef Prefix;
- MCSymbol *SectionSymbol;
public:
DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix)
- : Pool(A), Prefix(Prefix), SectionSymbol(Asm.GetTempSymbol(Prefix)) {}
+ : Pool(A), Prefix(Prefix) {}
void emit(AsmPrinter &Asm, const MCSection *StrSection,
- const MCSection *OffsetSection = nullptr,
- const MCSymbol *StrSecSym = nullptr);
-
- /// \brief Returns the entry into the start of the pool.
- MCSymbol *getSectionSymbol();
+ const MCSection *OffsetSection = nullptr);
/// \brief Returns an entry into the string pool with the given
/// string text.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 9538bee..919d9d2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "DwarfUnit.h"
+
#include "DwarfAccelTable.h"
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/IR/Constants.h"
@@ -30,6 +32,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -44,20 +47,12 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
: UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A),
- DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr),
- Skeleton(nullptr) {
+ DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
assert(UnitTag == dwarf::DW_TAG_compile_unit ||
UnitTag == dwarf::DW_TAG_type_unit);
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
-DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
- AsmPrinter *A, DwarfDebug *DW,
- DwarfFile *DWU)
- : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU) {
- insertDIE(Node, &getUnitDie());
-}
-
DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
MCDwarfDwoLineTable *SplitLineTable)
@@ -146,7 +141,7 @@ static bool isShareableAcrossCUs(DIDescriptor D) {
/// will be kept in DwarfDebug for shareable DIEs.
DIE *DwarfUnit::getDIE(DIDescriptor D) const {
if (isShareableAcrossCUs(D))
- return DD->getDIE(D);
+ return DU->getDIE(D);
return MDNodeToDieMap.lookup(D);
}
@@ -155,7 +150,7 @@ DIE *DwarfUnit::getDIE(DIDescriptor D) const {
/// will be kept in DwarfDebug for shareable DIEs.
void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) {
if (isShareableAcrossCUs(Desc)) {
- DD->insertDIE(Desc, D);
+ DU->insertDIE(Desc, D);
return;
}
MDNodeToDieMap.insert(std::make_pair(Desc, D));
@@ -206,10 +201,14 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
/// table.
void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
-
- if (!DD->useSplitDwarf())
+ if (!isDwoUnit())
return addLocalString(Die, Attribute, String);
+ addIndexedString(Die, Attribute, String);
+}
+
+void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute,
+ StringRef String) {
unsigned idx = DU->getStringPool().getIndex(*Asm, String);
DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
@@ -224,31 +223,12 @@ void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute,
DIEValue *Value;
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
Value = new (DIEValueAllocator) DIELabel(Symb);
- else {
- MCSymbol *StringPool = DU->getStringPool().getSectionSymbol();
- Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
- }
+ else
+ Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym());
DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
Die.addValue(Attribute, dwarf::DW_FORM_strp, Str);
}
-/// addExpr - Add a Dwarf expression attribute data and value.
-///
-void DwarfUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) {
- DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
- Die.addValue((dwarf::Attribute)0, Form, Value);
-}
-
-/// addLocationList - Add a Dwarf loclistptr attribute data and value.
-///
-void DwarfUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
- unsigned Index) {
- DIEValue *Value = new (DIEValueAllocator) DIELocList(Index);
- dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4;
- Die.addValue(Attribute, Form, Value);
-}
-
/// addLabel - Add a Dwarf label attribute data and value.
///
void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
@@ -261,16 +241,6 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
addLabel(Die, (dwarf::Attribute)0, Form, Label);
}
-/// addSectionLabel - Add a Dwarf section label attribute data and value.
-///
-void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label) {
- if (DD->getDwarfVersion() >= 4)
- addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label);
- else
- addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label);
-}
-
/// addSectionOffset - Add an offset into a section attribute data and value.
///
void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
@@ -281,45 +251,6 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
}
-/// addLabelAddress - Add a dwarf label attribute data and value using
-/// DW_FORM_addr or DW_FORM_GNU_addr_index.
-///
-void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label) {
-
- if (!DD->useSplitDwarf())
- return addLocalLabelAddress(Die, Attribute, Label);
-
- if (Label)
- DD->addArangeLabel(SymbolCU(this, Label));
-
- unsigned idx = DD->getAddressPool().getIndex(Label);
- DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
- Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
-}
-
-void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
- dwarf::Attribute Attribute,
- const MCSymbol *Label) {
- if (Label)
- DD->addArangeLabel(SymbolCU(this, Label));
-
- Die.addValue(Attribute, dwarf::DW_FORM_addr,
- Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label)
- : new (DIEValueAllocator) DIEInteger(0));
-}
-
-unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
- // If we print assembly, we can't separate .file entries according to
- // compile units. Thus all files will belong to the default compile unit.
-
- // FIXME: add a better feature test than hasRawTextSupport. Even better,
- // extend .file to support this.
- return Asm->OutStreamer.EmitDwarfFileDirective(
- 0, DirName, FileName,
- Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID());
-}
-
unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
return SplitLineTable ? SplitLineTable->getFile(DirName, FileName)
: getCU().getOrCreateSourceID(FileName, DirName);
@@ -339,16 +270,6 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
}
}
-/// addSectionDelta - Add a section label delta attribute data and value.
-///
-void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo) {
- DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
- Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- Value);
-}
-
void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
@@ -477,22 +398,12 @@ void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) {
addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory());
}
-/// addVariableAddress - Add DW_AT_location attribute for a
-/// DbgVariable based on provided MachineLocation.
-void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
- MachineLocation Location) {
- if (DV.variableHasComplexAddress())
- addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
- else if (DV.isBlockByrefVariable())
- addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
- else
- addAddress(Die, dwarf::DW_AT_location, Location,
- DV.getVariable().isIndirect());
-}
-
/// addRegisterOp - Add register operand.
-void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+// FIXME: Ideally, this would share the implementation with
+// AsmPrinter::EmitDwarfRegOpPiece.
+void DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
+ unsigned SizeInBits, unsigned OffsetInBits) {
+ const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
int DWReg = RI->getDwarfRegNum(Reg, false);
bool isSubRegister = DWReg < 0;
@@ -511,7 +422,7 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
return;
}
- // Emit register
+ // Emit register.
if (DWReg < 32)
addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
else {
@@ -519,18 +430,34 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
addUInt(TheDie, dwarf::DW_FORM_udata, DWReg);
}
- // Emit Mask
- if (isSubRegister) {
- unsigned Size = RI->getSubRegIdxSize(Idx);
- unsigned Offset = RI->getSubRegIdxOffset(Idx);
- if (Offset > 0) {
+ // Emit mask.
+ bool isPiece = SizeInBits > 0;
+ if (isSubRegister || isPiece) {
+ const unsigned SizeOfByte = 8;
+ unsigned RegSizeInBits = RI->getSubRegIdxSize(Idx);
+ unsigned RegOffsetInBits = RI->getSubRegIdxOffset(Idx);
+ unsigned PieceSizeInBits = std::max(SizeInBits, RegSizeInBits);
+ unsigned PieceOffsetInBits = OffsetInBits ? OffsetInBits : RegOffsetInBits;
+ assert(RegSizeInBits >= SizeInBits && "register smaller than value");
+
+ if (RegOffsetInBits != PieceOffsetInBits) {
+ // Manually shift the value into place, since the DW_OP_piece
+ // describes the part of the variable, not the position of the
+ // subregister.
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(TheDie, dwarf::DW_FORM_data1, RegOffsetInBits);
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_shr);
+ }
+
+ if (PieceOffsetInBits > 0 || PieceSizeInBits % SizeOfByte) {
+ assert(PieceSizeInBits > 0 && "piece has zero size");
addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece);
- addUInt(TheDie, dwarf::DW_FORM_data1, Size);
- addUInt(TheDie, dwarf::DW_FORM_data1, Offset);
- } else {
- unsigned ByteSize = Size / 8; // Assuming 8 bits per byte.
+ addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits);
+ addUInt(TheDie, dwarf::DW_FORM_data1, PieceOffsetInBits);
+ } else {
+ assert(PieceSizeInBits > 0 && "piece has zero size");
addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece);
- addUInt(TheDie, dwarf::DW_FORM_data1, ByteSize);
+ addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits/SizeOfByte);
}
}
}
@@ -538,9 +465,9 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
/// addRegisterOffset - Add register offset.
void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
int64_t Offset) {
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
unsigned DWReg = RI->getDwarfRegNum(Reg, false);
- const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
if (Reg == TRI->getFrameRegister(*Asm->MF))
// If variable offset is based in frame register then use fbreg.
addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
@@ -553,63 +480,6 @@ void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
addSInt(TheDie, dwarf::DW_FORM_sdata, Offset);
}
-/// addAddress - Add an address attribute to a die based on the location
-/// provided.
-void DwarfUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
- const MachineLocation &Location, bool Indirect) {
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
-
- if (Location.isReg() && !Indirect)
- addRegisterOp(*Loc, Location.getReg());
- else {
- addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
- if (Indirect && !Location.isReg()) {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- }
- }
-
- // Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
-}
-
-/// addComplexAddress - Start with the address based on the location provided,
-/// and generate the DWARF information necessary to find the actual variable
-/// given the extra address information encoded in the DbgVariable, starting
-/// from the starting location. Add the DWARF information to the die.
-///
-void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
- dwarf::Attribute Attribute,
- const MachineLocation &Location) {
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- unsigned N = DV.getNumAddrElements();
- unsigned i = 0;
- if (Location.isReg()) {
- if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
- // If first address element is OpPlus then emit
- // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
- addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1));
- i = 2;
- } else
- addRegisterOp(*Loc, Location.getReg());
- } else
- addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
-
- for (; i < N; ++i) {
- uint64_t Element = DV.getAddrElement(i);
- if (Element == DIBuilder::OpPlus) {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
- } else if (Element == DIBuilder::OpDeref) {
- if (!Location.isReg())
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- } else
- llvm_unreachable("unknown DIBuilder Opcode");
- }
-
- // Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
-}
-
/* Byref variables, in Blocks, are declared by the programmer as "SomeType
VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
gives the variable VarName either the struct, or a pointer to the struct, as
@@ -690,7 +560,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Find the __forwarding field and the variable field in the __Block_byref
// struct.
- DIArray Fields = blockStruct.getTypeArray();
+ DIArray Fields = blockStruct.getElements();
DIDerivedType varField;
DIDerivedType forwardingField;
@@ -712,7 +582,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
if (Location.isReg())
- addRegisterOp(*Loc, Location.getReg());
+ addRegisterOpPiece(*Loc, Location.getReg());
else
addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
@@ -1002,11 +872,9 @@ void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
DD->addAccelType(Ty.getName(), TyDIE, Flags);
- if ((!Context || Context.isCompileUnit() || Context.isFile() ||
- Context.isNameSpace()) &&
- getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly)
- GlobalTypes[getParentContextString(Context) + Ty.getName().str()] =
- &TyDIE;
+ if (!Context || Context.isCompileUnit() || Context.isFile() ||
+ Context.isNameSpace())
+ addGlobalType(Ty, TyDIE, Context);
}
}
@@ -1031,14 +899,6 @@ void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) {
addDIEEntry(Entity, Attribute, Entry);
}
-/// addGlobalName - Add a new global name to the compile unit.
-void DwarfUnit::addGlobalName(StringRef Name, DIE &Die, DIScope Context) {
- if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly)
- return;
- std::string FullName = getParentContextString(Context) + Name.str();
- GlobalNames[FullName] = &Die;
-}
-
/// getParentContextString - Walks the metadata parent chain in a language
/// specific manner (using the compile unit language) and returns
/// it as a string. This is done at the metadata level because DIEs may
@@ -1129,16 +989,16 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
}
/// constructSubprogramArguments - Construct function argument DIEs.
-void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) {
+void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) {
for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
- DIDescriptor Ty = Args.getElement(i);
- if (Ty.isUnspecifiedParameter()) {
+ DIType Ty = resolve(Args.getElement(i));
+ if (!Ty) {
assert(i == N-1 && "Unspecified parameter must be the last argument");
createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
} else {
DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
- addType(Arg, DIType(Ty));
- if (DIType(Ty).isArtificial())
+ addType(Arg, Ty);
+ if (Ty.isArtificial())
addFlag(Arg, dwarf::DW_AT_artificial);
}
}
@@ -1161,14 +1021,14 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
break;
case dwarf::DW_TAG_subroutine_type: {
// Add return type. A void return won't have a type.
- DIArray Elements = CTy.getTypeArray();
- DIType RTy(Elements.getElement(0));
+ DITypeArray Elements = DISubroutineType(CTy).getTypeArray();
+ DIType RTy(resolve(Elements.getElement(0)));
if (RTy)
addType(Buffer, RTy);
bool isPrototyped = true;
if (Elements.getNumElements() == 2 &&
- Elements.getElement(1).isUnspecifiedParameter())
+ !Elements.getElement(1))
isPrototyped = false;
constructSubprogramArguments(Buffer, Elements);
@@ -1191,7 +1051,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_class_type: {
// Add elements to structure type.
- DIArray Elements = CTy.getTypeArray();
+ DIArray Elements = CTy.getElements();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
if (Element.isSubprogram())
@@ -1373,20 +1233,23 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
}
/// getOrCreateSubprogramDIE - Create new DIE using SP.
-DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
+DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE (as is the case for member function
// declarations).
- DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+ DIE *ContextDIE =
+ Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext()));
if (DIE *SPDie = getDIE(SP))
return SPDie;
if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
- // Add subprogram definitions to the CU die directly.
- ContextDIE = &getUnitDie();
- // Build the decl now to ensure it precedes the definition.
- getOrCreateSubprogramDIE(SPDecl);
+ if (!Minimal) {
+ // Add subprogram definitions to the CU die directly.
+ ContextDIE = &getUnitDie();
+ // Build the decl now to ensure it precedes the definition.
+ getOrCreateSubprogramDIE(SPDecl);
+ }
}
// DW_TAG_inlined_subroutine may refer to this DIE.
@@ -1401,14 +1264,8 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
return &SPDie;
}
-void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) {
- DISubprogram SPDecl = SP.getFunctionDeclaration();
- DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
- applySubprogramAttributes(SP, SPDie);
- addGlobalName(SP.getName(), SPDie, Context);
-}
-
-void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
+bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
+ DIE &SPDie) {
DIE *DeclDie = nullptr;
StringRef DeclLinkageName;
if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
@@ -1431,17 +1288,29 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
GlobalValue::getRealLinkageName(LinkageName));
- if (DeclDie) {
- // Refer to the function declaration where all the other attributes will be
- // found.
- addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
- return;
- }
+ if (!DeclDie)
+ return false;
+
+ // Refer to the function declaration where all the other attributes will be
+ // found.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
+ return true;
+}
+
+void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
+ bool Minimal) {
+ if (!Minimal)
+ if (applySubprogramDefinitionAttributes(SP, SPDie))
+ return;
// Constructors and operators for anonymous aggregates do not have names.
if (!SP.getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP.getName());
+ // Skip the rest of the attributes under -gmlt to save space.
+ if (Minimal)
+ return;
+
addSourceLine(SPDie, SP);
// Add the prototype if we have a prototype and we have a C like
@@ -1452,15 +1321,15 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
- DICompositeType SPTy = SP.getType();
+ DISubroutineType SPTy = SP.getType();
assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type &&
"the type of a subprogram should be a subroutine");
- DIArray Args = SPTy.getTypeArray();
+ DITypeArray Args = SPTy.getTypeArray();
// Add a return type. If this is a type like a C/C++ void type we don't add a
// return type.
- if (Args.getElement(0))
- addType(SPDie, DIType(Args.getElement(0)));
+ if (resolve(Args.getElement(0)))
+ addType(SPDie, DIType(resolve(Args.getElement(0))));
unsigned VK = SP.getVirtuality();
if (VK) {
@@ -1506,7 +1375,7 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
else if (SP.isPrivate())
addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
- else
+ else if (SP.isPublic())
addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
@@ -1514,184 +1383,6 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
addFlag(SPDie, dwarf::DW_AT_explicit);
}
-void DwarfUnit::applyVariableAttributes(const DbgVariable &Var,
- DIE &VariableDie) {
- StringRef Name = Var.getName();
- if (!Name.empty())
- addString(VariableDie, dwarf::DW_AT_name, Name);
- addSourceLine(VariableDie, Var.getVariable());
- addType(VariableDie, Var.getType());
- if (Var.isArtificial())
- addFlag(VariableDie, dwarf::DW_AT_artificial);
-}
-
-// Return const expression if value is a GEP to access merged global
-// constant. e.g.
-// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
-static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
- const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
- if (!CE || CE->getNumOperands() != 3 ||
- CE->getOpcode() != Instruction::GetElementPtr)
- return nullptr;
-
- // First operand points to a global struct.
- Value *Ptr = CE->getOperand(0);
- if (!isa<GlobalValue>(Ptr) ||
- !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
- return nullptr;
-
- // Second operand is zero.
- const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
- if (!CI || !CI->isZero())
- return nullptr;
-
- // Third operand is offset.
- if (!isa<ConstantInt>(CE->getOperand(2)))
- return nullptr;
-
- return CE;
-}
-
-/// createGlobalVariableDIE - create global variable DIE.
-void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
- // Check for pre-existence.
- if (getDIE(GV))
- return;
-
- assert(GV.isGlobalVariable());
-
- DIScope GVContext = GV.getContext();
- DIType GTy = DD->resolve(GV.getType());
-
- // If this is a static data member definition, some attributes belong
- // to the declaration DIE.
- DIE *VariableDIE = nullptr;
- bool IsStaticMember = false;
- DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration();
- if (SDMDecl.Verify()) {
- assert(SDMDecl.isStaticMember() && "Expected static member decl");
- // We need the declaration DIE that is in the static member's class.
- VariableDIE = getOrCreateStaticMemberDIE(SDMDecl);
- IsStaticMember = true;
- }
-
- // If this is not a static data member definition, create the variable
- // DIE and add the initial set of attributes to it.
- if (!VariableDIE) {
- // Construct the context before querying for the existence of the DIE in
- // case such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(GVContext);
-
- // Add to map.
- VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
-
- // Add name and type.
- addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
- addType(*VariableDIE, GTy);
-
- // Add scoping info.
- if (!GV.isLocalToUnit())
- addFlag(*VariableDIE, dwarf::DW_AT_external);
-
- // Add line number info.
- addSourceLine(*VariableDIE, GV);
- }
-
- // Add location.
- bool addToAccelTable = false;
- DIE *VariableSpecDIE = nullptr;
- bool isGlobalVariable = GV.getGlobal() != nullptr;
- if (isGlobalVariable) {
- addToAccelTable = true;
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
- if (GV.getGlobal()->isThreadLocal()) {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
- } else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
- }
- // 3) followed by a custom OP to make the debugger do a TLS lookup.
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
- } else {
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
- }
- // Do not create specification DIE if context is either compile unit
- // or a subprogram.
- if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
- !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) {
- // Create specification DIE.
- VariableSpecDIE = &createAndAddDIE(dwarf::DW_TAG_variable, UnitDie);
- addDIEEntry(*VariableSpecDIE, dwarf::DW_AT_specification, *VariableDIE);
- addBlock(*VariableSpecDIE, dwarf::DW_AT_location, Loc);
- // A static member's declaration is already flagged as such.
- if (!SDMDecl.Verify())
- addFlag(*VariableDIE, dwarf::DW_AT_declaration);
- } else {
- addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
- }
- // Add the linkage name.
- StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty())
- // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
- // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
- // TAG_variable.
- addString(IsStaticMember && VariableSpecDIE ? *VariableSpecDIE
- : *VariableDIE,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
- : dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
- } else if (const ConstantInt *CI =
- dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
- // AT_const_value was added when the static member was created. To avoid
- // emitting AT_const_value multiple times, we only add AT_const_value when
- // it is not a static member.
- if (!IsStaticMember)
- addConstantValue(*VariableDIE, CI, GTy);
- } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) {
- addToAccelTable = true;
- // GV is a merged global.
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- Value *Ptr = CE->getOperand(0);
- MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr));
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
- addUInt(*Loc, dwarf::DW_FORM_udata,
- Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
- }
-
- if (addToAccelTable) {
- DIE &AddrDIE = VariableSpecDIE ? *VariableSpecDIE : *VariableDIE;
- DD->addAccelName(GV.getName(), AddrDIE);
-
- // If the linkage name is different than the name, go ahead and output
- // that as well into the name table.
- if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
- DD->addAccelName(GV.getLinkageName(), AddrDIE);
- }
-
- addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
- GV.getContext());
-}
-
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
@@ -1700,9 +1391,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
// The LowerBound value defines the lower bounds which is typically zero for
// C/C++. The Count value is the number of elements. Values are 64 bit. If
// Count == -1 then the array is unbounded and we do not emit
- // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and
- // Count == 0, then the array has zero elements in which case we do not emit
- // an upper bound.
+ // DW_AT_lower_bound and DW_AT_count attributes.
int64_t LowerBound = SR.getLo();
int64_t DefaultLowerBound = getDefaultLowerBound();
int64_t Count = SR.getCount();
@@ -1710,11 +1399,22 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
- if (Count != -1 && Count != 0)
+ if (Count != -1)
// FIXME: An unbounded array should reference the expression that defines
// the array.
- addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None,
- LowerBound + Count - 1);
+ addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);
+}
+
+DIE *DwarfUnit::getIndexTyDie() {
+ if (IndexTyDie)
+ return IndexTyDie;
+ // Construct an integer type to use for indexes.
+ IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
+ addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype");
+ addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
+ addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_unsigned);
+ return IndexTyDie;
}
/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
@@ -1729,18 +1429,9 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
// FIXME: This type should be passed down from the front end
// as different languages may have different sizes for indexes.
DIE *IdxTy = getIndexTyDie();
- if (!IdxTy) {
- // Construct an integer type to use for indexes.
- IdxTy = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
- addString(*IdxTy, dwarf::DW_AT_name, "sizetype");
- addUInt(*IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
- addUInt(*IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- dwarf::DW_ATE_unsigned);
- setIndexTyDie(IdxTy);
- }
// Add subranges to array type.
- DIArray Elements = CTy.getTypeArray();
+ DIArray Elements = CTy.getElements();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
if (Element.getTag() == dwarf::DW_TAG_subrange_type)
@@ -1750,7 +1441,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType.
void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
- DIArray Elements = CTy.getTypeArray();
+ DIArray Elements = CTy.getElements();
// Add enumerators to enumeration type.
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
@@ -1788,68 +1479,6 @@ void DwarfUnit::constructContainingTypeDIEs() {
}
}
-/// constructVariableDIE - Construct a DIE for the given DbgVariable.
-std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV,
- bool Abstract) {
- auto D = constructVariableDIEImpl(DV, Abstract);
- DV.setDIE(*D);
- return D;
-}
-
-std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV,
- bool Abstract) {
- // Define variable debug information entry.
- auto VariableDie = make_unique<DIE>(DV.getTag());
-
- if (Abstract) {
- applyVariableAttributes(DV, *VariableDie);
- return VariableDie;
- }
-
- // Add variable address.
-
- unsigned Offset = DV.getDotDebugLocOffset();
- if (Offset != ~0U) {
- addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
- return VariableDie;
- }
-
- // Check if variable is described by a DBG_VALUE instruction.
- if (const MachineInstr *DVInsn = DV.getMInsn()) {
- assert(DVInsn->getNumOperands() == 3);
- if (DVInsn->getOperand(0).isReg()) {
- const MachineOperand RegOp = DVInsn->getOperand(0);
- // If the second operand is an immediate, this is an indirect value.
- if (DVInsn->getOperand(1).isImm()) {
- MachineLocation Location(RegOp.getReg(),
- DVInsn->getOperand(1).getImm());
- addVariableAddress(DV, *VariableDie, Location);
- } else if (RegOp.getReg())
- addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
- } else if (DVInsn->getOperand(0).isImm())
- addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
- else if (DVInsn->getOperand(0).isFPImm())
- addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isCImm())
- addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
- DV.getType());
-
- return VariableDie;
- }
-
- // .. else use frame index.
- int FI = DV.getFrameIndex();
- if (FI != ~0) {
- unsigned FrameReg = 0;
- const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
- int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- MachineLocation Location(FrameReg, Offset);
- addVariableAddress(DV, *VariableDie, Location);
- }
-
- return VariableDie;
-}
-
/// constructMemberDIE - Construct member DIE from DIDerivedType.
void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer);
@@ -1922,7 +1551,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
// Otherwise C++ member and base classes are considered public.
- else
+ else if (DT.isPublic())
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
@@ -1971,7 +1600,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
else if (DT.isPrivate())
addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
- else
+ else if (DT.isPublic())
addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
@@ -1984,6 +1613,10 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
}
void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
+ // Emit size of content not including length itself
+ Asm->OutStreamer.AddComment("Length of Unit");
+ Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
+
Asm->OutStreamer.AddComment("DWARF version number");
Asm->EmitInt16(DD->getDwarfVersion());
Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
@@ -1999,50 +1632,9 @@ void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
}
-void DwarfUnit::addRange(RangeSpan Range) {
- // Only add a range for this unit if we're emitting full debug.
- if (getCUNode().getEmissionKind() == DIBuilder::FullDebug) {
- // If we have no current ranges just add the range and return, otherwise,
- // check the current section and CU against the previous section and CU we
- // emitted into and the subprogram was contained within. If these are the
- // same then extend our current range, otherwise add this as a new range.
- if (CURanges.size() == 0 ||
- this != DD->getPrevCU() ||
- Asm->getCurrentSection() != DD->getPrevSection()) {
- CURanges.push_back(Range);
- return;
- }
-
- assert(&(CURanges.back().getEnd()->getSection()) ==
- &(Range.getEnd()->getSection()) &&
- "We can only append to a range in the same section!");
- CURanges.back().setEnd(Range.getEnd());
- }
-}
-
-void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
- // Define start line table label for each Compile Unit.
- MCSymbol *LineTableStartSym =
- Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
-
- stmtListIndex = UnitDie.getValues().size();
-
- // DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section. For split dwarf this is
- // left in the skeleton CU and so not included.
- // The line table entries are not always emitted in assembly, so it
- // is not okay to use line_table_start here.
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym);
- else
- addSectionDelta(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
- DwarfLineSectionSym);
-}
-
-void DwarfCompileUnit::applyStmtList(DIE &D) {
- D.addValue(dwarf::DW_AT_stmt_list,
- UnitDie.getAbbrev().getData()[stmtListIndex].getForm(),
- UnitDie.getValues()[stmtListIndex]);
+void DwarfUnit::initSection(const MCSection *Section) {
+ assert(!this->Section);
+ this->Section = Section;
}
void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
@@ -2055,16 +1647,8 @@ void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
sizeof(Ty->getOffset()));
}
-void DwarfTypeUnit::initSection(const MCSection *Section) {
- assert(!this->Section);
- this->Section = Section;
- // Since each type unit is contained in its own COMDAT section, the begin
- // label and the section label are the same. Using the begin label emission in
- // DwarfDebug to emit the section label as well is slightly subtle/sneaky, but
- // the only other alternative of lazily constructing start-of-section labels
- // and storing a mapping in DwarfDebug (or AsmPrinter).
- this->SectionSym = this->LabelBegin =
- Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
- this->LabelEnd =
- Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID());
+bool DwarfTypeUnit::isDwoUnit() const {
+ // Since there are no skeleton type units, all type units are dwo type units
+ // when split DWARF is being used.
+ return DD->useSplitDwarf();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index b7b83b2..f40c937 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
-#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
#include "DIE.h"
#include "DwarfDebug.h"
@@ -55,7 +55,8 @@ private:
SmallVector<RangeSpan, 2> Ranges;
public:
- RangeSpanList(MCSymbol *Sym) : RangeSym(Sym) {}
+ RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges)
+ : RangeSym(Sym), Ranges(std::move(Ranges)) {}
MCSymbol *getSym() const { return RangeSym; }
const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
void addRange(RangeSpan Range) { Ranges.push_back(Range); }
@@ -96,12 +97,6 @@ protected:
/// descriptors to debug information entries using a DIEEntry proxy.
DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
- /// GlobalNames - A map of globally visible named entities for this unit.
- StringMap<const DIE *> GlobalNames;
-
- /// GlobalTypes - A map of globally visible types for this unit.
- StringMap<const DIE *> GlobalTypes;
-
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -113,13 +108,6 @@ protected:
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
- // List of ranges for a given compile unit.
- SmallVector<RangeSpan, 1> CURanges;
-
- // List of range lists for a given compile unit, separate from the ranges for
- // the CU itself.
- SmallVector<RangeSpanList, 1> CURangeLists;
-
// DIEValueAllocator - All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
@@ -129,86 +117,31 @@ protected:
/// The section this unit will be emitted in.
const MCSection *Section;
- /// A label at the start of the non-dwo section related to this unit.
- MCSymbol *SectionSym;
+ DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU);
- /// The start of the unit within its section.
- MCSymbol *LabelBegin;
+ void initSection(const MCSection *Section);
- /// The end of the unit within its section.
- MCSymbol *LabelEnd;
+ /// Add a string attribute data and value.
+ void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
- /// Skeleton unit associated with this unit.
- DwarfUnit *Skeleton;
+ void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
- DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU);
+ bool applySubprogramDefinitionAttributes(DISubprogram SP, DIE &SPDie);
public:
virtual ~DwarfUnit();
- /// Set the skeleton unit associated with this unit.
- void setSkeleton(DwarfUnit &Skel) { Skeleton = &Skel; }
-
- /// Get the skeleton unit associated with this unit.
- DwarfUnit *getSkeleton() const { return Skeleton; }
-
- /// Pass in the SectionSym even though we could recreate it in every compile
- /// unit (type units will have actually distinct symbols once they're in
- /// comdat sections).
- void initSection(const MCSection *Section, MCSymbol *SectionSym) {
- assert(!this->Section);
- this->Section = Section;
- this->SectionSym = SectionSym;
- this->LabelBegin =
- Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
- this->LabelEnd =
- Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID());
- }
-
const MCSection *getSection() const {
assert(Section);
return Section;
}
- /// If there's a skeleton then return the section symbol for the skeleton
- /// unit, otherwise return the section symbol for this unit.
- MCSymbol *getLocalSectionSym() const {
- if (Skeleton)
- return Skeleton->getSectionSym();
- return getSectionSym();
- }
-
- MCSymbol *getSectionSym() const {
- assert(Section);
- return SectionSym;
- }
-
- /// If there's a skeleton then return the begin label for the skeleton unit,
- /// otherwise return the local label for this unit.
- MCSymbol *getLocalLabelBegin() const {
- if (Skeleton)
- return Skeleton->getLabelBegin();
- return getLabelBegin();
- }
-
- MCSymbol *getLabelBegin() const {
- assert(Section);
- return LabelBegin;
- }
-
- MCSymbol *getLabelEnd() const {
- assert(Section);
- return LabelEnd;
- }
-
// Accessors.
unsigned getUniqueID() const { return UniqueID; }
uint16_t getLanguage() const { return CUNode.getLanguage(); }
DICompileUnit getCUNode() const { return CUNode; }
DIE &getUnitDie() { return UnitDie; }
- const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
- const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
@@ -216,29 +149,15 @@ public:
/// hasContent - Return true if this compile unit has something to write out.
bool hasContent() const { return !UnitDie.getChildren().empty(); }
- /// addRange - Add an address range to the list of ranges for this unit.
- void addRange(RangeSpan Range);
-
- /// getRanges - Get the list of ranges for this unit.
- const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
- SmallVectorImpl<RangeSpan> &getRanges() { return CURanges; }
-
- /// addRangeList - Add an address range list to the list of range lists.
- void addRangeList(RangeSpanList Ranges) { CURangeLists.push_back(Ranges); }
-
- /// getRangeLists - Get the vector of range lists.
- const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
- return CURangeLists;
- }
- SmallVectorImpl<RangeSpanList> &getRangeLists() { return CURangeLists; }
-
/// getParentContextString - Get a string containing the language specific
/// context for a global name.
std::string getParentContextString(DIScope Context) const;
- /// addGlobalName - Add a new global entity to the compile unit.
- ///
- void addGlobalName(StringRef Name, DIE &Die, DIScope Context);
+ /// Add a new global name to the compile unit.
+ virtual void addGlobalName(StringRef Name, DIE &Die, DIScope Context) {}
+
+ /// Add a new global type to the compile unit.
+ virtual void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) {}
/// addAccelNamespace - Add a new name to the namespace accelerator table.
void addAccelNamespace(StringRef Name, const DIE &Die);
@@ -275,14 +194,7 @@ public:
void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
/// addString - Add a string attribute data and value.
- void addString(DIE &Die, dwarf::Attribute Attribute, const StringRef Str);
-
- /// addLocalString - Add a string attribute data and value.
- void addLocalString(DIE &Die, dwarf::Attribute Attribute,
- const StringRef Str);
-
- /// addExpr - Add a Dwarf expression attribute data and value.
- void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
+ void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
/// addLabel - Add a Dwarf label attribute data and value.
void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
@@ -290,14 +202,6 @@ public:
void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
- /// addLocationList - Add a Dwarf loclistptr attribute data and value.
- void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
-
- /// addSectionLabel - Add a Dwarf section label attribute data and value.
- ///
- void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label);
-
/// addSectionOffset - Add an offset into a section attribute data and value.
///
void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
@@ -306,10 +210,6 @@ public:
/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
void addOpAddress(DIELoc &Die, const MCSymbol *Label);
- /// addSectionDelta - Add a label delta attribute data and value.
- void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
- const MCSymbol *Lo);
-
/// addLabelDelta - Add a label delta attribute data and value.
void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
const MCSymbol *Lo);
@@ -339,11 +239,6 @@ public:
void addSourceLine(DIE &Die, DINameSpace NS);
void addSourceLine(DIE &Die, DIObjCProperty Ty);
- /// addAddress - Add an address attribute to a die based on the location
- /// provided.
- void addAddress(DIE &Die, dwarf::Attribute Attribute,
- const MachineLocation &Location, bool Indirect = false);
-
/// addConstantValue - Add constant value entry in variable DIE.
void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty);
void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty);
@@ -359,19 +254,12 @@ public:
void addTemplateParams(DIE &Buffer, DIArray TParams);
/// addRegisterOp - Add register operand.
- void addRegisterOp(DIELoc &TheDie, unsigned Reg);
+ void addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
+ unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
/// addRegisterOffset - Add register offset.
void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
- /// addComplexAddress - Start with the address based on the location provided,
- /// and generate the DWARF information necessary to find the actual variable
- /// (navigating the extra location information encoded in the type) based on
- /// the starting location. Add the DWARF information to the die.
- void addComplexAddress(const DbgVariable &DV, DIE &Die,
- dwarf::Attribute Attribute,
- const MachineLocation &Location);
-
// FIXME: Should be reformulated in terms of addComplexAddress.
/// addBlockByrefAddress - Start with the address based on the location
/// provided, and generate the DWARF information necessary to find the
@@ -382,11 +270,6 @@ public:
dwarf::Attribute Attribute,
const MachineLocation &Location);
- /// addVariableAddress - Add DW_AT_location attribute for a
- /// DbgVariable based on provided MachineLocation.
- void addVariableAddress(const DbgVariable &DV, DIE &Die,
- MachineLocation Location);
-
/// addType - Add a new type attribute to the specified entity. This takes
/// and attribute parameter because DW_AT_friend attributes are also
/// type references.
@@ -397,11 +280,10 @@ public:
DIE *getOrCreateNameSpace(DINameSpace NS);
/// getOrCreateSubprogramDIE - Create new DIE using SP.
- DIE *getOrCreateSubprogramDIE(DISubprogram SP);
+ DIE *getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal = false);
- void applySubprogramAttributes(DISubprogram SP, DIE &SPDie);
- void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
- void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
+ void applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
+ bool Minimal = false);
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
@@ -417,12 +299,8 @@ public:
/// vtables.
void constructContainingTypeDIEs();
- /// constructVariableDIE - Construct a DIE for the given DbgVariable.
- std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
- bool Abstract = false);
-
/// constructSubprogramArguments - Construct function argument DIEs.
- void constructSubprogramArguments(DIE &Buffer, DIArray Args);
+ void constructSubprogramArguments(DIE &Buffer, DITypeArray Args);
/// Create a DIE with the given Tag, add the DIE to its parent, and
/// call insertDIE if MD is not null.
@@ -453,12 +331,13 @@ protected:
/// none currently exists, create a new ID and insert it in the line table.
virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
-private:
- /// \brief Construct a DIE for the given DbgVariable without initializing the
- /// DbgVariable's DIE reference.
- std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV,
- bool Abstract);
+ /// resolve - Look in the DwarfDebug map for the MDNode that
+ /// corresponds to the reference.
+ template <typename T> T resolve(DIRef<T> Ref) const {
+ return DD->resolve(Ref);
+ }
+private:
/// constructTypeDIE - Construct basic type die from DIBasicType.
void constructTypeDIE(DIE &Buffer, DIBasicType BTy);
@@ -503,7 +382,7 @@ private:
}
// getIndexTyDie - Get an anonymous type for index type.
- DIE *getIndexTyDie() { return IndexTyDie; }
+ DIE *getIndexTyDie();
// setIndexTyDie - Set D as anonymous type for index which can be reused
// later.
@@ -513,56 +392,22 @@ private:
/// information entry.
DIEEntry *createDIEEntry(DIE &Entry);
- /// resolve - Look in the DwarfDebug map for the MDNode that
- /// corresponds to the reference.
- template <typename T> T resolve(DIRef<T> Ref) const {
- return DD->resolve(Ref);
- }
-
/// If this is a named finished type then include it in the list of types for
/// the accelerator tables.
void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE);
-};
-
-class DwarfCompileUnit : public DwarfUnit {
- /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
- /// the need to search for it in applyStmtList.
- unsigned stmtListIndex;
-
-public:
- DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU);
-
- void initStmtList(MCSymbol *DwarfLineSectionSym);
-
- /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
- void applyStmtList(DIE &D);
-
- /// createGlobalVariableDIE - create global variable DIE.
- void createGlobalVariableDIE(DIGlobalVariable GV);
-
- /// addLabelAddress - Add a dwarf label attribute data and value using
- /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
- void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label);
-
- /// addLocalLabelAddress - Add a dwarf label attribute data and value using
- /// DW_FORM_addr only.
- void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label);
- DwarfCompileUnit &getCU() override { return *this; }
-
- unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+ virtual bool isDwoUnit() const = 0;
};
class DwarfTypeUnit : public DwarfUnit {
-private:
uint64_t TypeSignature;
const DIE *Ty;
DwarfCompileUnit &CU;
MCDwarfDwoLineTable *SplitLineTable;
+ unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override;
+ bool isDwoUnit() const override;
+
public:
DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
@@ -578,11 +423,8 @@ public:
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
sizeof(uint32_t); // Type DIE Offset
}
- void initSection(const MCSection *Section);
+ using DwarfUnit::initSection;
DwarfCompileUnit &getCU() override { return CU; }
-
-protected:
- unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override;
};
} // end llvm namespace
#endif
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 73f62bf..2bbffb3 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -237,7 +237,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// instruction between the previous try-range and this one may throw,
// create a call-site entry with no landing pad for the region between the
// try-ranges.
- if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) {
CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
CallSites.push_back(Site);
PreviousIsInvoke = false;
@@ -259,7 +259,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
};
// Try to merge with the previous call-site. SJLJ doesn't do this
- if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
+ if (PreviousIsInvoke && Asm->MAI->usesItaniumLSDAForExceptions()) {
CallSiteEntry &Prev = CallSites.back();
if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
// Extend the range of the previous entry.
@@ -269,7 +269,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
}
// Otherwise, create a new call-site.
- if (Asm->MAI->isExceptionHandlingDwarf())
+ if (Asm->MAI->usesItaniumLSDAForExceptions())
CallSites.push_back(Site);
else {
// SjLj EH must maintain the call sites in the order assigned
@@ -287,7 +287,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// If some instruction between the previous try-range and the end of the
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
- if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) {
CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
CallSites.push_back(Site);
}
@@ -314,7 +314,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
/// 3. Type ID table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reverse indexed base 1.
void EHStreamer::emitExceptionTable() {
- const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
@@ -520,7 +520,7 @@ void EHStreamer::emitExceptionTable() {
}
} else {
// DWARF Exception handling
- assert(Asm->MAI->isExceptionHandlingDwarf());
+ assert(Asm->MAI->usesItaniumLSDAForExceptions());
// The call-site table is a list of all call sites that may throw an
// exception (including C++ 'throw' statements) in the procedure
@@ -649,7 +649,7 @@ void EHStreamer::emitExceptionTable() {
}
void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
- const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
@@ -662,9 +662,9 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
Entry = TypeInfos.size();
}
- for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ for (std::vector<const GlobalValue *>::const_reverse_iterator
I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
- const GlobalVariable *GV = *I;
+ const GlobalValue *GV = *I;
if (VerboseAsm)
Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 2b6ba78..7e9549d 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
-#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
#include "AsmPrinterHandler.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index bfcbe6b..5bda5a9 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -28,6 +28,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -50,7 +51,8 @@ void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
MCStreamer &OS = AP.OutStreamer;
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
// Put this in a custom .note section.
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 5a9ecd7..6480d048 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
using namespace llvm;
@@ -91,7 +92,8 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
/// either condition is detected in a function which uses the GC.
///
void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(getModule(), AP, "code_end");
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 81285d5..0f0ad75 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "DwarfException.h"
+#include "Win64Exception.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h
new file mode 100644
index 0000000..538e132
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.h
@@ -0,0 +1,52 @@
+//===-- Win64Exception.h - Windows Exception Handling ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing windows exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
+
+#include "EHStreamer.h"
+
+namespace llvm {
+class MachineFunction;
+
+class Win64Exception : public EHStreamer {
+ /// Per-function flag to indicate if personality info should be emitted.
+ bool shouldEmitPersonality;
+
+ /// Per-function flag to indicate if the LSDA should be emitted.
+ bool shouldEmitLSDA;
+
+ /// Per-function flag to indicate if frame moves info should be emitted.
+ bool shouldEmitMoves;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ Win64Exception(AsmPrinter *A);
+ virtual ~Win64Exception();
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+};
+}
+
+#endif
+
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 6a5c431..b5e0929 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -116,15 +116,67 @@ WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
Asm = AP;
}
+void WinCodeViewLineTables::endModule() {
+ if (FnDebugInfo.empty())
+ return;
+
+ assert(Asm != nullptr);
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
+ Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
+
+ // The COFF .debug$S section consists of several subsections, each starting
+ // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
+ // of the payload followed by the payload itself. The subsections are 4-byte
+ // aligned.
+
+ // Emit per-function debug information. This code is extracted into a
+ // separate function for readability.
+ for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
+ emitDebugInfoForFunction(VisitedFunctions[I]);
+
+ // This subsection holds a file index to offset in string table table.
+ Asm->OutStreamer.AddComment("File index to string table offset subsection");
+ Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
+ size_t NumFilenames = FileNameRegistry.Infos.size();
+ Asm->EmitInt32(8 * NumFilenames);
+ for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
+ StringRef Filename = FileNameRegistry.Filenames[I];
+ // For each unique filename, just write its offset in the string table.
+ Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
+ // The function name offset is not followed by any additional data.
+ Asm->EmitInt32(0);
+ }
+
+ // This subsection holds the string table.
+ Asm->OutStreamer.AddComment("String table");
+ Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
+ Asm->EmitInt32(FileNameRegistry.LastOffset);
+ // The payload starts with a null character.
+ Asm->EmitInt8(0);
+
+ for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
+ // Just emit unique filenames one by one, separated by a null character.
+ Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]);
+ Asm->EmitInt8(0);
+ }
+
+ // No more subsections. Fill with zeros to align the end of the section by 4.
+ Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
+
+ clear();
+}
+
static void EmitLabelDiff(MCStreamer &Streamer,
- const MCSymbol *From, const MCSymbol *To) {
+ const MCSymbol *From, const MCSymbol *To,
+ unsigned int Size = 4) {
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
MCContext &Context = Streamer.getContext();
const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context),
*ToRef = MCSymbolRefExpr::Create(To, Variant, Context);
const MCExpr *AddrDelta =
MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
- Streamer.EmitValue(AddrDelta, 4);
+ Streamer.EmitValue(AddrDelta, Size);
}
void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
@@ -138,6 +190,51 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
return;
assert(FI.End && "Don't know where the function ends?");
+ StringRef FuncName = getDISubprogram(GV).getDisplayName(),
+ GVName = GV->getName();
+ // FIXME Clang currently sets DisplayName to "bar" for a C++
+ // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying
+ // to demangle display names anyways, so let's just put a mangled name into
+ // the symbols subsection until Clang gives us what we need.
+ if (GVName.startswith("\01?"))
+ FuncName = GVName.substr(1);
+ // Emit a symbol subsection, required by VS2012+ to find function boundaries.
+ MCSymbol *SymbolsBegin = Asm->MMI->getContext().CreateTempSymbol(),
+ *SymbolsEnd = Asm->MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.AddComment("Symbol subsection for " + Twine(FuncName));
+ Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION);
+ EmitLabelDiff(Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
+ Asm->OutStreamer.EmitLabel(SymbolsBegin);
+ {
+ MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(),
+ *ProcSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
+ EmitLabelDiff(Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
+ Asm->OutStreamer.EmitLabel(ProcSegmentBegin);
+
+ Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START);
+ // Some bytes of this segment don't seem to be required for basic debugging,
+ // so just fill them with zeroes.
+ Asm->OutStreamer.EmitFill(12, 0);
+ // This is the important bit that tells the debugger where the function
+ // code is located and what's its size:
+ EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
+ Asm->OutStreamer.EmitFill(12, 0);
+ Asm->OutStreamer.EmitCOFFSecRel32(Fn);
+ Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+ Asm->EmitInt8(0);
+ // Emit the function display name as a null-terminated string.
+ Asm->OutStreamer.EmitBytes(FuncName);
+ Asm->EmitInt8(0);
+ Asm->OutStreamer.EmitLabel(ProcSegmentEnd);
+
+ // We're done with this function.
+ Asm->EmitInt16(0x0002);
+ Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END);
+ }
+ Asm->OutStreamer.EmitLabel(SymbolsEnd);
+ // Every subsection must be aligned to a 4-byte boundary.
+ Asm->OutStreamer.EmitFill((-FuncName.size()) % 4, 0);
+
// PCs/Instructions are grouped into segments sharing the same filename.
// Pre-calculate the lengths (in instructions) of these segments and store
// them in a map for convenience. Each index in the map is the sequential
@@ -154,18 +251,19 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
}
FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
- // Emit the control code of the subsection followed by the payload size.
- Asm->OutStreamer.AddComment(
- "Linetable subsection for " + Twine(Fn->getName()));
+ // Emit a line table subsection, requred to do PC-to-file:line lookup.
+ Asm->OutStreamer.AddComment("Line table subsection for " + Twine(FuncName));
Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
- MCSymbol *SubsectionBegin = Asm->MMI->getContext().CreateTempSymbol(),
- *SubsectionEnd = Asm->MMI->getContext().CreateTempSymbol();
- EmitLabelDiff(Asm->OutStreamer, SubsectionBegin, SubsectionEnd);
- Asm->OutStreamer.EmitLabel(SubsectionBegin);
+ MCSymbol *LineTableBegin = Asm->MMI->getContext().CreateTempSymbol(),
+ *LineTableEnd = Asm->MMI->getContext().CreateTempSymbol();
+ EmitLabelDiff(Asm->OutStreamer, LineTableBegin, LineTableEnd);
+ Asm->OutStreamer.EmitLabel(LineTableBegin);
// Identify the function this subsection is for.
Asm->OutStreamer.EmitCOFFSecRel32(Fn);
Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+ // Insert padding after a 16-bit section index.
+ Asm->EmitInt16(0);
// Length of the function's code, in bytes.
EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
@@ -209,56 +307,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
if (FileSegmentEnd)
Asm->OutStreamer.EmitLabel(FileSegmentEnd);
- Asm->OutStreamer.EmitLabel(SubsectionEnd);
-}
-
-void WinCodeViewLineTables::endModule() {
- if (FnDebugInfo.empty())
- return;
-
- assert(Asm != nullptr);
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
- Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
-
- // The COFF .debug$S section consists of several subsections, each starting
- // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
- // of the payload followed by the payload itself. The subsections are 4-byte
- // aligned.
-
- for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
- emitDebugInfoForFunction(VisitedFunctions[I]);
-
- // This subsection holds a file index to offset in string table table.
- Asm->OutStreamer.AddComment("File index to string table offset subsection");
- Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
- size_t NumFilenames = FileNameRegistry.Infos.size();
- Asm->EmitInt32(8 * NumFilenames);
- for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
- StringRef Filename = FileNameRegistry.Filenames[I];
- // For each unique filename, just write it's offset in the string table.
- Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
- // The function name offset is not followed by any additional data.
- Asm->EmitInt32(0);
- }
-
- // This subsection holds the string table.
- Asm->OutStreamer.AddComment("String table");
- Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
- Asm->EmitInt32(FileNameRegistry.LastOffset);
- // The payload starts with a null character.
- Asm->EmitInt8(0);
-
- for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
- // Just emit unique filenames one by one, separated by a null character.
- Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]);
- Asm->EmitInt8(0);
- }
-
- // No more subsections. Fill with zeros to align the end of the section by 4.
- Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
-
- clear();
+ Asm->OutStreamer.EmitLabel(LineTableEnd);
}
void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 0734d97..8492eac 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__
-#define CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H
#include "AsmPrinterHandler.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
deleted file mode 100644
index 421946d..0000000
--- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass (at IR level) to replace atomic instructions with
-// appropriate (intrinsic-based) ldrex/strex loops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "arm-atomic-expand"
-
-namespace {
- class AtomicExpandLoadLinked : public FunctionPass {
- const TargetMachine *TM;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {
- initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
- bool expandAtomicInsts(Function &F);
-
- bool expandAtomicLoad(LoadInst *LI);
- bool expandAtomicStore(StoreInst *LI);
- bool expandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
-
- AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
- void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
- };
-}
-
-char AtomicExpandLoadLinked::ID = 0;
-char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
-INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc",
- "Expand Atomic calls in terms of load-linked & store-conditional",
- false, false)
-
-FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
- return new AtomicExpandLoadLinked(TM);
-}
-
-bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
- if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
- return false;
-
- SmallVector<Instruction *, 1> AtomicInsts;
-
- // Changing control-flow while iterating through it is a bad idea, so gather a
- // list of all atomic instructions before we start.
- for (BasicBlock &BB : F)
- for (Instruction &Inst : BB) {
- if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
- (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
- (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
- AtomicInsts.push_back(&Inst);
- }
-
- bool MadeChange = false;
- for (Instruction *Inst : AtomicInsts) {
- if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst))
- continue;
-
- if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
- MadeChange |= expandAtomicRMW(AI);
- else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
- MadeChange |= expandAtomicCmpXchg(CI);
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- MadeChange |= expandAtomicLoad(LI);
- else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- MadeChange |= expandAtomicStore(SI);
- else
- llvm_unreachable("Unknown atomic instruction");
- }
-
- return MadeChange;
-}
-
-bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
- // Load instructions don't actually need a leading fence, even in the
- // SequentiallyConsistent case.
- AtomicOrdering MemOpOrder =
- TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic
- : LI->getOrdering();
-
- // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
- // an ldrexd (A3.5.3).
- IRBuilder<> Builder(LI);
- Value *Val = TM->getTargetLowering()->emitLoadLinked(
- Builder, LI->getPointerOperand(), MemOpOrder);
-
- insertTrailingFence(Builder, LI->getOrdering());
-
- LI->replaceAllUsesWith(Val);
- LI->eraseFromParent();
-
- return true;
-}
-
-bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) {
- // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
- // we need a loop and the entire instruction is essentially an "atomicrmw
- // xchg" that ignores the value loaded.
- IRBuilder<> Builder(SI);
- AtomicRMWInst *AI =
- Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
- SI->getValueOperand(), SI->getOrdering());
- SI->eraseFromParent();
-
- // Now we have an appropriate swap instruction, lower it as usual.
- return expandAtomicRMW(AI);
-}
-
-bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
- AtomicOrdering Order = AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: atomicrmw some_op iN* %addr, iN %incr ordering
- //
- // The standard expansion we produce is:
- // [...]
- // fence?
- // atomicrmw.start:
- // %loaded = @load.linked(%addr)
- // %new = some_op iN %loaded, %incr
- // %stored = @store_conditional(%new, %addr)
- // %try_again = icmp i32 ne %stored, 0
- // br i1 %try_again, label %loop, label %atomicrmw.end
- // atomicrmw.end:
- // fence?
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we might want a fence too. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- Value *Loaded =
- TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
-
- Value *NewVal;
- switch (AI->getOperation()) {
- case AtomicRMWInst::Xchg:
- NewVal = AI->getValOperand();
- break;
- case AtomicRMWInst::Add:
- NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Sub:
- NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::And:
- NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Nand:
- NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
- "new");
- break;
- case AtomicRMWInst::Or:
- NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Xor:
- NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Max:
- NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Min:
- NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::UMax:
- NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::UMin:
- NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- default:
- llvm_unreachable("Unknown atomic op");
- }
-
- Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
- Builder, NewVal, Addr, MemOpOrder);
- Value *TryAgain = Builder.CreateICmpNE(
- StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
- Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
-
- Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- insertTrailingFence(Builder, Order);
-
- AI->replaceAllUsesWith(Loaded);
- AI->eraseFromParent();
-
- return true;
-}
-
-bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
- AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
- AtomicOrdering FailureOrder = CI->getFailureOrdering();
- Value *Addr = CI->getPointerOperand();
- BasicBlock *BB = CI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
- //
- // The full expansion we produce is:
- // [...]
- // fence?
- // cmpxchg.start:
- // %loaded = @load.linked(%addr)
- // %should_store = icmp eq %loaded, %desired
- // br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.failure
- // cmpxchg.trystore:
- // %stored = @store_conditional(%new, %addr)
- // %success = icmp eq i32 %stored, 0
- // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
- // cmpxchg.success:
- // fence?
- // br label %cmpxchg.end
- // cmpxchg.failure:
- // fence?
- // br label %cmpxchg.end
- // cmpxchg.end:
- // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
- // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
- // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
- auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
- auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
- auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
- auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
-
- // This grabs the DebugLoc from CI
- IRBuilder<> Builder(CI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we might want a fence too. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- Value *Loaded =
- TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
- Value *ShouldStore =
- Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
-
- // If the the cmpxchg doesn't actually need any ordering when it fails, we can
- // jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
-
- Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
- Builder, CI->getNewValOperand(), Addr, MemOpOrder);
- StoreSuccess = Builder.CreateICmpEQ(
- StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
- Builder.CreateCondBr(StoreSuccess, SuccessBB,
- CI->isWeak() ? FailureBB : LoopBB);
-
- // Make sure later instructions don't get reordered with a fence if necessary.
- Builder.SetInsertPoint(SuccessBB);
- insertTrailingFence(Builder, SuccessOrder);
- Builder.CreateBr(ExitBB);
-
- Builder.SetInsertPoint(FailureBB);
- insertTrailingFence(Builder, FailureOrder);
- Builder.CreateBr(ExitBB);
-
- // Finally, we have control-flow based knowledge of whether the cmpxchg
- // succeeded or not. We expose this to later passes by converting any
- // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
-
- // Setup the builder so we can create any PHIs we need.
- Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
- Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
- Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
-
- // Look for any users of the cmpxchg that are just comparing the loaded value
- // against the desired one, and replace them with the CFG-derived version.
- SmallVector<ExtractValueInst *, 2> PrunedInsts;
- for (auto User : CI->users()) {
- ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
- if (!EV)
- continue;
-
- assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
- "weird extraction from { iN, i1 }");
-
- if (EV->getIndices()[0] == 0)
- EV->replaceAllUsesWith(Loaded);
- else
- EV->replaceAllUsesWith(Success);
-
- PrunedInsts.push_back(EV);
- }
-
- // We can remove the instructions now we're no longer iterating through them.
- for (auto EV : PrunedInsts)
- EV->eraseFromParent();
-
- if (!CI->use_empty()) {
- // Some use of the full struct return that we don't understand has happened,
- // so we've got to reconstruct it properly.
- Value *Res;
- Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
- Res = Builder.CreateInsertValue(Res, Success, 1);
-
- CI->replaceAllUsesWith(Res);
- }
-
- CI->eraseFromParent();
- return true;
-}
-
-AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord) {
- if (!TM->getTargetLowering()->getInsertFencesForAtomic())
- return Ord;
-
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- Builder.CreateFence(Release);
-
- // The exclusive operations don't need any barrier if we're adding separate
- // fences.
- return Monotonic;
-}
-
-void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord) {
- if (!TM->getTargetLowering()->getInsertFencesForAtomic())
- return;
-
- if (Ord == Acquire || Ord == AcquireRelease)
- Builder.CreateFence(Acquire);
- else if (Ord == SequentiallyConsistent)
- Builder.CreateFence(SequentiallyConsistent);
-}
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
new file mode 100644
index 0000000..12f6bd7
--- /dev/null
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -0,0 +1,563 @@
+//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions with
+// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "atomic-expand"
+
+namespace {
+ class AtomicExpand: public FunctionPass {
+ const TargetMachine *TM;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit AtomicExpand(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {
+ initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ private:
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+ bool IsStore, bool IsLoad);
+ bool expandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+ bool expandAtomicStore(StoreInst *SI);
+ bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
+ bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
+ bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *AI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *AI);
+ };
+}
+
+char AtomicExpand::ID = 0;
+char &llvm::AtomicExpandID = AtomicExpand::ID;
+INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand",
+ "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg",
+ false, false)
+
+FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
+ return new AtomicExpand(TM);
+}
+
+bool AtomicExpand::runOnFunction(Function &F) {
+ if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
+ return false;
+ auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
+
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (I->isAtomic())
+ AtomicInsts.push_back(&*I);
+ }
+
+ bool MadeChange = false;
+ for (auto I : AtomicInsts) {
+ auto LI = dyn_cast<LoadInst>(I);
+ auto SI = dyn_cast<StoreInst>(I);
+ auto RMWI = dyn_cast<AtomicRMWInst>(I);
+ auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
+ assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) &&
+ "Unknown atomic instruction");
+
+ auto FenceOrdering = Monotonic;
+ bool IsStore, IsLoad;
+ if (TargetLowering->getInsertFencesForAtomic()) {
+ if (LI && isAtLeastAcquire(LI->getOrdering())) {
+ FenceOrdering = LI->getOrdering();
+ LI->setOrdering(Monotonic);
+ IsStore = false;
+ IsLoad = true;
+ } else if (SI && isAtLeastRelease(SI->getOrdering())) {
+ FenceOrdering = SI->getOrdering();
+ SI->setOrdering(Monotonic);
+ IsStore = true;
+ IsLoad = false;
+ } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) ||
+ isAtLeastAcquire(RMWI->getOrdering()))) {
+ FenceOrdering = RMWI->getOrdering();
+ RMWI->setOrdering(Monotonic);
+ IsStore = IsLoad = true;
+ } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
+ (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+ isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+ // If a compare and swap is lowered to LL/SC, we can do smarter fence
+ // insertion, with a stronger one on the success path than on the
+ // failure path. As a result, fence insertion is directly done by
+ // expandAtomicCmpXchg in that case.
+ FenceOrdering = CASI->getSuccessOrdering();
+ CASI->setSuccessOrdering(Monotonic);
+ CASI->setFailureOrdering(Monotonic);
+ IsStore = IsLoad = true;
+ }
+
+ if (FenceOrdering != Monotonic) {
+ MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
+ }
+ }
+
+ if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
+ MadeChange |= expandAtomicLoad(LI);
+ } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
+ MadeChange |= expandAtomicStore(SI);
+ } else if (RMWI) {
+ // There are two different ways of expanding RMW instructions:
+ // - into a load if it is idempotent
+ // - into a Cmpxchg/LL-SC loop otherwise
+ // we try them in that order.
+ MadeChange |= (isIdempotentRMW(RMWI) &&
+ simplifyIdempotentRMW(RMWI)) ||
+ (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) &&
+ expandAtomicRMW(RMWI));
+ } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
+ MadeChange |= expandAtomicCmpXchg(CASI);
+ }
+ }
+ return MadeChange;
+}
+
+bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+ bool IsStore, bool IsLoad) {
+ IRBuilder<> Builder(I);
+
+ auto LeadingFence =
+ TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
+ Builder, Order, IsStore, IsLoad);
+
+ auto TrailingFence =
+ TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
+ Builder, Order, IsStore, IsLoad);
+ // The trailing fence is emitted before the instruction instead of after
+ // because there is no easy way of setting Builder insertion point after
+ // an instruction. So we must erase it from the BB, and insert it back
+ // in the right place.
+ // We have a guard here because not every atomic operation generates a
+ // trailing fence.
+ if (TrailingFence) {
+ TrailingFence->removeFromParent();
+ TrailingFence->insertAfter(I);
+ }
+
+ return (LeadingFence || TrailingFence);
+}
+
+bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
+ if (TM->getSubtargetImpl()
+ ->getTargetLowering()
+ ->hasLoadLinkedStoreConditional())
+ return expandAtomicLoadToLL(LI);
+ else
+ return expandAtomicLoadToCmpXchg(LI);
+}
+
+bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+ IRBuilder<> Builder(LI);
+
+ // On some architectures, load-linked instructions are atomic for larger
+ // sizes than normal loads. For example, the only 64-bit load guaranteed
+ // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
+ Value *Val =
+ TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
+
+ LI->replaceAllUsesWith(Val);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+ IRBuilder<> Builder(LI);
+ AtomicOrdering Order = LI->getOrdering();
+ Value *Addr = LI->getPointerOperand();
+ Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ Constant *DummyVal = Constant::getNullValue(Ty);
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, DummyVal, DummyVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+
+ LI->replaceAllUsesWith(Loaded);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
+ // This function is only called on atomic stores that are too large to be
+ // atomic if implemented as a native store. So we replace them by an
+ // atomic swap, that can be implemented for example as a ldrex/strex on ARM
+ // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
+ // It is the responsibility of the target to only return true in
+ // shouldExpandAtomicRMW in cases where this is required and possible.
+ IRBuilder<> Builder(SI);
+ AtomicRMWInst *AI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
+ SI->getValueOperand(), SI->getOrdering());
+ SI->eraseFromParent();
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ return expandAtomicRMW(AI);
+}
+
+bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
+ if (TM->getSubtargetImpl()
+ ->getTargetLowering()
+ ->hasLoadLinkedStoreConditional())
+ return expandAtomicRMWToLLSC(AI);
+ else
+ return expandAtomicRMWToCmpXchg(AI);
+}
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
+ Value *Loaded, Value *Inc) {
+ Value *NewVal;
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return Inc;
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Loaded, Inc, "new");
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Loaded, Inc, "new");
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Loaded, Inc, "new");
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Loaded, Inc, "new");
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+}
+
+bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+ AtomicOrdering MemOpOrder = AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // fence?
+ // atomicrmw.start:
+ // %loaded = @load.linked(%addr)
+ // %new = some_op iN %loaded, %incr
+ // %stored = @store_conditional(%new, %addr)
+ // %try_again = icmp i32 ne %stored, 0
+ // br i1 %try_again, label %loop, label %atomicrmw.end
+ // atomicrmw.end:
+ // fence?
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *StoreSuccess =
+ TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
+ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+ AI->replaceAllUsesWith(Loaded);
+ AI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
+ AtomicOrdering MemOpOrder =
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ // Atomics require at least natural alignment.
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+ AI->replaceAllUsesWith(NewLoaded);
+ AI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+ AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
+ AtomicOrdering FailureOrder = CI->getFailureOrdering();
+ Value *Addr = CI->getPointerOperand();
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+ // If getInsertFencesForAtomic() returns true, then the target does not want
+ // to deal with memory orders, and emitLeading/TrailingFence should take care
+ // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
+ // should preserve the ordering.
+ AtomicOrdering MemOpOrder =
+ TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
+
+ // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
+ //
+ // The full expansion we produce is:
+ // [...]
+ // fence?
+ // cmpxchg.start:
+ // %loaded = @load.linked(%addr)
+ // %should_store = icmp eq %loaded, %desired
+ // br i1 %should_store, label %cmpxchg.trystore,
+ // label %cmpxchg.failure
+ // cmpxchg.trystore:
+ // %stored = @store_conditional(%new, %addr)
+ // %success = icmp eq i32 %stored, 0
+ // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
+ // cmpxchg.success:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.failure:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.end:
+ // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+ // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
+ // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
+ auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+ auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
+ auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
+
+ // This grabs the DebugLoc from CI
+ IRBuilder<> Builder(CI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *ShouldStore =
+ Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
+
+ // If the the cmpxchg doesn't actually need any ordering when it fails, we can
+ // jump straight past that fence instruction (if it exists).
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
+
+ Builder.SetInsertPoint(TryStoreBB);
+ Value *StoreSuccess = TLI->emitStoreConditional(
+ Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ StoreSuccess = Builder.CreateICmpEQ(
+ StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+ Builder.CreateCondBr(StoreSuccess, SuccessBB,
+ CI->isWeak() ? FailureBB : LoopBB);
+
+ // Make sure later instructions don't get reordered with a fence if necessary.
+ Builder.SetInsertPoint(SuccessBB);
+ TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(ExitBB);
+
+ Builder.SetInsertPoint(FailureBB);
+ TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(ExitBB);
+
+ // Finally, we have control-flow based knowledge of whether the cmpxchg
+ // succeeded or not. We expose this to later passes by converting any
+ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
+
+ // Setup the builder so we can create any PHIs we need.
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+ Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
+ Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+
+ // Look for any users of the cmpxchg that are just comparing the loaded value
+ // against the desired one, and replace them with the CFG-derived version.
+ SmallVector<ExtractValueInst *, 2> PrunedInsts;
+ for (auto User : CI->users()) {
+ ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
+ if (!EV)
+ continue;
+
+ assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
+ "weird extraction from { iN, i1 }");
+
+ if (EV->getIndices()[0] == 0)
+ EV->replaceAllUsesWith(Loaded);
+ else
+ EV->replaceAllUsesWith(Success);
+
+ PrunedInsts.push_back(EV);
+ }
+
+ // We can remove the instructions now we're no longer iterating through them.
+ for (auto EV : PrunedInsts)
+ EV->eraseFromParent();
+
+ if (!CI->use_empty()) {
+ // Some use of the full struct return that we don't understand has happened,
+ // so we've got to reconstruct it properly.
+ Value *Res;
+ Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ }
+
+ CI->eraseFromParent();
+ return true;
+}
+
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
+ if(!C)
+ return false;
+
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ switch(Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
+ }
+}
+
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+
+ if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
+ if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
+ expandAtomicLoad(ResultingLoad);
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index b2737bf..b9b1fd8 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -42,7 +42,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
/// Estimate the cost overhead of SK_Alternate shuffle.
unsigned getAltShuffleOverhead(Type *Ty) const;
- const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
+ const TargetLoweringBase *getTLI() const {
+ return TM->getSubtargetImpl()->getTargetLowering();
+ }
public:
BasicTTI() : ImmutablePass(ID), TM(nullptr) {
@@ -90,7 +92,7 @@ public:
unsigned getJumpBufSize() const override;
bool shouldBuildLookupTables() const override;
bool haveFastSqrt(Type *Ty) const override;
- void getUnrollingPreferences(Loop *L,
+ void getUnrollingPreferences(const Function *F, Loop *L,
UnrollingPreferences &UP) const override;
/// @}
@@ -99,10 +101,11 @@ public:
/// @{
unsigned getNumberOfRegisters(bool Vector) const override;
- unsigned getMaximumUnrollFactor() const override;
+ unsigned getMaxInterleaveFactor() const override;
unsigned getRegisterBitWidth(bool Vector) const override;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind) const override;
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override;
unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const override;
unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -186,9 +189,8 @@ unsigned BasicTTI::getJumpBufSize() const {
bool BasicTTI::shouldBuildLookupTables() const {
const TargetLoweringBase *TLI = getTLI();
- return TLI->supportJumpTables() &&
- (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+ return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
}
bool BasicTTI::haveFastSqrt(Type *Ty) const {
@@ -197,7 +199,7 @@ bool BasicTTI::haveFastSqrt(Type *Ty) const {
return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
}
-void BasicTTI::getUnrollingPreferences(Loop *L,
+void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L,
UnrollingPreferences &UP) const {
// This unrolling functionality is target independent, but to provide some
// motivation for its intended use, for x86:
@@ -223,11 +225,11 @@ void BasicTTI::getUnrollingPreferences(Loop *L,
// until someone finds a case where it matters in practice.
unsigned MaxOps;
- const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F);
if (PartialUnrollingThreshold.getNumOccurrences() > 0)
MaxOps = PartialUnrollingThreshold;
- else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0)
- MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize;
+ else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
+ MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
else
return;
@@ -282,13 +284,14 @@ unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
return 32;
}
-unsigned BasicTTI::getMaximumUnrollFactor() const {
+unsigned BasicTTI::getMaxInterleaveFactor() const {
return 1;
}
unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind,
- OperandValueKind) const {
+ OperandValueKind, OperandValueKind,
+ OperandValueProperties,
+ OperandValueProperties) const {
// Check if any of the operands are vector operands.
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -465,7 +468,8 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
- if (!TLI->isOperationExpand(ISD, LT.second)) {
+ if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
+ !TLI->isOperationExpand(ISD, LT.second)) {
// The operation is legal. Assume it costs 1. Multiply
// by the type-legalization overhead.
return LT.first * 1;
@@ -561,6 +565,8 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::log10: ISD = ISD::FLOG10; break;
case Intrinsic::log2: ISD = ISD::FLOG2; break;
case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::minnum: ISD = ISD::FMINNUM; break;
+ case Intrinsic::maxnum: ISD = ISD::FMAXNUM; break;
case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
case Intrinsic::floor: ISD = ISD::FFLOOR; break;
case Intrinsic::ceil: ISD = ISD::FCEIL; break;
@@ -572,6 +578,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::pow: ISD = ISD::FPOW; break;
case Intrinsic::fma: ISD = ISD::FMA; break;
case Intrinsic::fmuladd: ISD = ISD::FMA; break;
+ // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return 0;
@@ -582,7 +589,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
// The operation is legal. Assume it costs 1.
- // If the type is split to multiple registers, assume that thre is some
+ // If the type is split to multiple registers, assume that there is some
// overhead to this.
// TODO: Once we have extract/insert subvector cost we need to use them.
if (LT.first > 1)
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 7503e57..2128da1 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -20,6 +20,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -32,8 +34,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -70,6 +72,8 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -91,22 +95,24 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
// HW that requires structurized CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
PassConfig->getEnableTailMerge();
- BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true);
- return Folder.OptimizeFunction(MF,
- MF.getTarget().getInstrInfo(),
- MF.getTarget().getRegisterInfo(),
+ BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true,
+ getAnalysis<MachineBlockFrequencyInfo>(),
+ getAnalysis<MachineBranchProbabilityInfo>());
+ return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
+ MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
-
-BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+ const MachineBlockFrequencyInfo &FreqInfo,
+ const MachineBranchProbabilityInfo &ProbInfo)
+ : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
+ MBPI(ProbInfo) {
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
case cl::BOU_TRUE: EnableTailMerge = true; break;
case cl::BOU_FALSE: EnableTailMerge = false; break;
}
-
- EnableHoistCommonCode = CommonHoist;
}
/// RemoveDeadBlock - Remove the specified dead machine basic block from the
@@ -388,10 +394,8 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
RS->enterBasicBlock(CurMBB);
if (!CurMBB->empty())
RS->forward(std::prev(CurMBB->end()));
- BitVector RegsLiveAtExit(TRI->getNumRegs());
- RS->getRegsUsed(RegsLiveAtExit, false);
- for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
- if (RegsLiveAtExit[i])
+ for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++)
+ if (RS->isRegUsed(i, false))
NewMBB->addLiveIn(i);
}
}
@@ -435,6 +439,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// Splice the code over.
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+ // NewMBB inherits CurMBB's block frequency.
+ MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
+
// For targets that use the register scavenger, we must maintain LiveIns.
MaintainLiveIns(&CurMBB, NewMBB);
@@ -504,6 +511,21 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
#endif
}
+BlockFrequency
+BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const {
+ auto I = MergedBBFreq.find(MBB);
+
+ if (I != MergedBBFreq.end())
+ return I->second;
+
+ return MBFI.getBlockFreq(MBB);
+}
+
+void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
+ BlockFrequency F) {
+ MergedBBFreq[MBB] = F;
+}
+
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
@@ -806,6 +828,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
}
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ // Recompute commont tail MBB's edge weights and block frequency.
+ setCommonTailEdgeWeights(*MBB);
+
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
@@ -890,7 +916,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
continue;
// Visit each predecessor only once.
- if (!UniquePreds.insert(PBB))
+ if (!UniquePreds.insert(PBB).second)
continue;
// Skip blocks which may jump to a landing pad. Can't tail merge these.
@@ -968,6 +994,44 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
return MadeChange;
}
+void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
+ SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size());
+ BlockFrequency AccumulatedMBBFreq;
+
+ // Aggregate edge frequency of successor edge j:
+ // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)),
+ // where bb is a basic block that is in SameTails.
+ for (const auto &Src : SameTails) {
+ const MachineBasicBlock *SrcMBB = Src.getBlock();
+ BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB);
+ AccumulatedMBBFreq += BlockFreq;
+
+ // It is not necessary to recompute edge weights if TailBB has less than two
+ // successors.
+ if (TailMBB.succ_size() <= 1)
+ continue;
+
+ auto EdgeFreq = EdgeFreqLs.begin();
+
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq)
+ *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI);
+ }
+
+ MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq);
+
+ if (TailMBB.succ_size() <= 1)
+ return;
+
+ auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end());
+ uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1;
+ auto EdgeFreq = EdgeFreqLs.begin();
+
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq)
+ TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale);
+}
+
//===----------------------------------------------------------------------===//
// Branch Optimization
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 0d15ed7..3653a2c 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -7,14 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
-#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H
+#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/BlockFrequency.h"
#include <vector>
namespace llvm {
+ class MachineBlockFrequencyInfo;
+ class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineModuleInfo;
class RegScavenger;
@@ -23,7 +26,9 @@ namespace llvm {
class BranchFolder {
public:
- explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
+ explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+ const MachineBlockFrequencyInfo &MBFI,
+ const MachineBranchProbabilityInfo &MBPI);
bool OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
@@ -92,9 +97,26 @@ namespace llvm {
MachineModuleInfo *MMI;
RegScavenger *RS;
+ /// \brief This class keeps track of branch frequencies of newly created
+ /// blocks and tail-merged blocks.
+ class MBFIWrapper {
+ public:
+ MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
+ BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
+ void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
+
+ private:
+ const MachineBlockFrequencyInfo &MBFI;
+ DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq;
+ };
+
+ MBFIWrapper MBBFreqInfo;
+ const MachineBranchProbabilityInfo &MBPI;
+
bool TailMergeBlocks(MachineFunction &MF);
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
+ void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
void MaintainLiveIns(MachineBasicBlock *CurMBB,
MachineBasicBlock *NewMBB);
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 57c24e8..092346b 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -2,7 +2,7 @@ add_llvm_library(LLVMCodeGen
AggressiveAntiDepBreaker.cpp
AllocationOrder.cpp
Analysis.cpp
- AtomicExpandLoadLinkedPass.cpp
+ AtomicExpandPass.cpp
BasicTargetTransformInfo.cpp
BranchFolding.cpp
CalcSpillWeights.cpp
@@ -19,6 +19,7 @@ add_llvm_library(LLVMCodeGen
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
+ ForwardControlFlowIntegrity.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp
@@ -27,7 +28,6 @@ add_llvm_library(LLVMCodeGen
InlineSpiller.cpp
InterferenceCache.cpp
IntrinsicLowering.cpp
- JITCodeEmitter.cpp
JumpInstrTables.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
@@ -48,9 +48,10 @@ add_llvm_library(LLVMCodeGen
MachineBlockPlacement.cpp
MachineBranchProbabilityInfo.cpp
MachineCSE.cpp
- MachineCodeEmitter.cpp
+ MachineCombiner.cpp
MachineCopyPropagation.cpp
MachineDominators.cpp
+ MachineDominanceFrontier.cpp
MachineFunction.cpp
MachineFunctionAnalysis.cpp
MachineFunctionPass.cpp
@@ -64,6 +65,7 @@ add_llvm_library(LLVMCodeGen
MachinePassRegistry.cpp
MachinePostDominators.cpp
MachineRegisterInfo.cpp
+ MachineRegionInfo.cpp
MachineSSAUpdater.cpp
MachineScheduler.cpp
MachineSink.cpp
@@ -96,7 +98,6 @@ add_llvm_library(LLVMCodeGen
SjLjEHPrepare.cpp
SlotIndexes.cpp
SpillPlacement.cpp
- Spiller.cpp
SplitKit.cpp
StackColoring.cpp
StackProtector.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index bc033f9..d08fae0 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -16,8 +16,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "calcspillweights"
@@ -95,11 +95,12 @@ static bool isRematerializable(const LiveInterval &LI,
void
VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
MachineRegisterInfo &mri = MF.getRegInfo();
- const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
MachineBasicBlock *mbb = nullptr;
MachineLoop *loop = nullptr;
bool isExiting = false;
float totalWeight = 0;
+ unsigned numInstr = 0; // Number of instructions using li
SmallPtrSet<MachineInstr*, 8> visited;
// Find the best physreg hint and the best virtreg hint.
@@ -116,9 +117,10 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
I != E; ) {
MachineInstr *mi = &*(I++);
+ numInstr++;
if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
continue;
- if (!visited.insert(mi))
+ if (!visited.insert(mi).second)
continue;
float weight = 1.0f;
@@ -186,8 +188,8 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
// it is a preferred candidate for spilling.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
- if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo()))
+ if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo()))
totalWeight *= 0.5F;
- li.weight = normalize(totalWeight, li.getSize());
+ li.weight = normalize(totalWeight, li.getSize(), numInstr);
}
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index add861a..56ecde0 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -19,16 +19,15 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
- const TargetMachine &tm, SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &C)
- : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
- TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
- CallOrPrologue(Unknown) {
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
+ : CallingConv(CC), IsVarArg(isVarArg), MF(mf),
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C),
+ CallOrPrologue(Unknown) {
// No stack is used.
StackOffset = 0;
@@ -50,7 +49,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
if (MinAlign > (int)Align)
Align = MinAlign;
MF.getFrameInfo()->ensureMaxAlignment(Align);
- TM.getTargetLowering()->HandleByVal(this, Size, Align);
+ MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align);
+ Size = unsigned(RoundUpToAlignment(Size, MinAlign));
unsigned Offset = AllocateStack(Size, Align);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index b3beac3..307dec5 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
- initializeAtomicExpandLoadLinkedPass(Registry);
+ initializeAtomicExpandPass(Registry);
initializeBasicTTIPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeCodeGenPreparePass(Registry);
@@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
initializeMachineCopyPropagationPass(Registry);
+ initializeMachineCombinerPass(Registry);
initializeMachineCSEPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachinePostDominatorTreePass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index ccac40c..8d20848 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -63,6 +64,7 @@ STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
+STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
@@ -80,15 +82,29 @@ static cl::opt<bool> EnableAndCmpSinking(
"enable-andcmp-sinking", cl::Hidden, cl::init(true),
cl::desc("Enable sinkinig and/cmp into branches."));
+static cl::opt<bool> DisableStoreExtract(
+ "disable-cgp-store-extract", cl::Hidden, cl::init(false),
+ cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> StressStoreExtract(
+ "stress-cgp-store-extract", cl::Hidden, cl::init(false),
+ cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
-typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
+struct TypeIsSExt {
+ Type *Ty;
+ bool IsSExt;
+ TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
+};
+typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetMachine *TM;
const TargetLowering *TLI;
+ const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
@@ -118,7 +134,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr) {
+ : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -128,6 +144,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetTransformInfo>();
}
private:
@@ -144,6 +161,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
bool OptimizeExtUses(Instruction *I);
bool OptimizeSelectInst(SelectInst *SI);
bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
+ bool OptimizeExtractElementInst(Instruction *Inst);
bool DupRetToEnableTailCallOpts(BasicBlock *BB);
bool PlaceDbgValues(Function &F);
bool sinkAndCmp(Function &F);
@@ -168,8 +186,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
PromotedInsts.clear();
ModifiedDT = false;
- if (TM) TLI = TM->getTargetLowering();
+ if (TM)
+ TLI = TM->getSubtargetImpl()->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfo>();
+ TTI = &getAnalysis<TargetTransformInfo>();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
@@ -662,10 +682,13 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
if (!ISDOpcode)
continue;
- // If the use is actually a legal node, there will not be an implicit
- // truncate.
- if (TLI.isOperationLegalOrCustom(ISDOpcode,
- EVT::getEVT(TruncUser->getType())))
+ // If the use is actually a legal node, there will not be an
+ // implicit truncate.
+ // FIXME: always querying the result type is just an
+ // approximation; some nodes' legality is determined by the
+ // operand or other means. There's no good way to find out though.
+ if (TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
continue;
// Don't bother for PHI nodes.
@@ -978,7 +1001,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
} else {
SmallPtrSet<BasicBlock*, 4> VisitedBBs;
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (!VisitedBBs.insert(*PI))
+ if (!VisitedBBs.insert(*PI).second)
continue;
BasicBlock::InstListType &InstList = (*PI)->getInstList();
@@ -1250,46 +1273,75 @@ class TypePromotionTransaction {
/// \brief Build a truncate instruction.
class TruncBuilder : public TypePromotionAction {
+ Value *Val;
public:
/// \brief Build a truncate instruction of \p Opnd producing a \p Ty
/// result.
/// trunc Opnd to Ty.
TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
IRBuilder<> Builder(Opnd);
- Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted"));
- DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n");
+ Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
}
- /// \brief Get the built instruction.
- Instruction *getBuiltInstruction() { return Inst; }
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
/// \brief Remove the built instruction.
void undo() override {
- DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n");
- Inst->eraseFromParent();
+ DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
}
};
/// \brief Build a sign extension instruction.
class SExtBuilder : public TypePromotionAction {
+ Value *Val;
public:
/// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
/// result.
/// sext Opnd to Ty.
SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
- : TypePromotionAction(Inst) {
+ : TypePromotionAction(InsertPt) {
+ IRBuilder<> Builder(InsertPt);
+ Val = Builder.CreateSExt(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
+ }
+
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
+
+ /// \brief Remove the built instruction.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
+ }
+ };
+
+ /// \brief Build a zero extension instruction.
+ class ZExtBuilder : public TypePromotionAction {
+ Value *Val;
+ public:
+ /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
+ /// result.
+ /// zext Opnd to Ty.
+ ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
+ : TypePromotionAction(InsertPt) {
IRBuilder<> Builder(InsertPt);
- Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted"));
- DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n");
+ Val = Builder.CreateZExt(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
}
- /// \brief Get the built instruction.
- Instruction *getBuiltInstruction() { return Inst; }
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
/// \brief Remove the built instruction.
void undo() override {
- DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n");
- Inst->eraseFromParent();
+ DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
}
};
@@ -1418,9 +1470,11 @@ public:
/// Same as Value::mutateType.
void mutateType(Instruction *Inst, Type *NewTy);
/// Same as IRBuilder::createTrunc.
- Instruction *createTrunc(Instruction *Opnd, Type *Ty);
+ Value *createTrunc(Instruction *Opnd, Type *Ty);
/// Same as IRBuilder::createSExt.
- Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+ Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+ /// Same as IRBuilder::createZExt.
+ Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
/// Same as Instruction::moveBefore.
void moveBefore(Instruction *Inst, Instruction *Before);
/// @}
@@ -1452,20 +1506,28 @@ void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
}
-Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd,
- Type *Ty) {
+Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
+ Type *Ty) {
std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
- Instruction *I = Ptr->getBuiltInstruction();
+ Value *Val = Ptr->getBuiltValue();
Actions.push_back(std::move(Ptr));
- return I;
+ return Val;
}
-Instruction *TypePromotionTransaction::createSExt(Instruction *Inst,
- Value *Opnd, Type *Ty) {
+Value *TypePromotionTransaction::createSExt(Instruction *Inst,
+ Value *Opnd, Type *Ty) {
std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
- Instruction *I = Ptr->getBuiltInstruction();
+ Value *Val = Ptr->getBuiltValue();
+ Actions.push_back(std::move(Ptr));
+ return Val;
+}
+
+Value *TypePromotionTransaction::createZExt(Instruction *Inst,
+ Value *Opnd, Type *Ty) {
+ std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
+ Value *Val = Ptr->getBuiltValue();
Actions.push_back(std::move(Ptr));
- return I;
+ return Val;
}
void TypePromotionTransaction::moveBefore(Instruction *Inst,
@@ -1658,58 +1720,75 @@ static bool MightBeFoldableInst(Instruction *I) {
/// \brief Hepler class to perform type promotion.
class TypePromotionHelper {
- /// \brief Utility function to check whether or not a sign extension of
- /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either
- /// using the operands of \p Inst or promoting \p Inst.
+ /// \brief Utility function to check whether or not a sign or zero extension
+ /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
+ /// either using the operands of \p Inst or promoting \p Inst.
+ /// The type of the extension is defined by \p IsSExt.
/// In other words, check if:
- /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType.
+ /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
/// #1 Promotion applies:
- /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...).
+ /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
/// #2 Operand reuses:
- /// sext opnd1 to ConsideredSExtType.
+ /// ext opnd1 to ConsideredExtType.
/// \p PromotedInsts maps the instructions to their type before promotion.
- static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType,
- const InstrToOrigTy &PromotedInsts);
+ static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
+ const InstrToOrigTy &PromotedInsts, bool IsSExt);
/// \brief Utility function to determine if \p OpIdx should be promoted when
/// promoting \p Inst.
- static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
+ static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
if (isa<SelectInst>(Inst) && OpIdx == 0)
return false;
return true;
}
- /// \brief Utility function to promote the operand of \p SExt when this
- /// operand is a promotable trunc or sext.
+ /// \brief Utility function to promote the operand of \p Ext when this
+ /// operand is a promotable trunc or sext or zext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
- /// created to promote the operand of SExt.
+ /// created to promote the operand of Ext.
/// Should never be called directly.
- /// \return The promoted value which is used instead of SExt.
- static Value *promoteOperandForTruncAndSExt(Instruction *SExt,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ /// \return The promoted value which is used instead of Ext.
+ static Value *promoteOperandForTruncAndAnyExt(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts);
- /// \brief Utility function to promote the operand of \p SExt when this
+ /// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
- /// created to promote the operand of SExt.
+ /// created to promote the operand of Ext.
/// Should never be called directly.
- /// \return The promoted value which is used instead of SExt.
- static Value *promoteOperandForOther(Instruction *SExt,
+ /// \return The promoted value which is used instead of Ext.
+ static Value *promoteOperandForOther(Instruction *Ext,
TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ unsigned &CreatedInsts, bool IsSExt);
+
+ /// \see promoteOperandForOther.
+ static Value *signExtendOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, true);
+ }
+
+ /// \see promoteOperandForOther.
+ static Value *zeroExtendOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, false);
+ }
public:
- /// Type for the utility function that promotes the operand of SExt.
- typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT,
+ /// Type for the utility function that promotes the operand of Ext.
+ typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
unsigned &CreatedInsts);
- /// \brief Given a sign extend instruction \p SExt, return the approriate
- /// action to promote the operand of \p SExt instead of using SExt.
+ /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
+ /// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
/// sign extension.
/// \p InsertedTruncs keeps track of all the truncate instructions inserted by
@@ -1717,36 +1796,42 @@ public:
/// because we do not want to promote these instructions as CodeGenPrepare
/// will reinsert them later. Thus creating an infinite loop: create/remove.
/// \p PromotedInsts maps the instructions to their type before promotion.
- static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs,
+ static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs,
const TargetLowering &TLI,
const InstrToOrigTy &PromotedInsts);
};
bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
- Type *ConsideredSExtType,
- const InstrToOrigTy &PromotedInsts) {
- // We can always get through sext.
- if (isa<SExtInst>(Inst))
+ Type *ConsideredExtType,
+ const InstrToOrigTy &PromotedInsts,
+ bool IsSExt) {
+ // We can always get through zext.
+ if (isa<ZExtInst>(Inst))
+ return true;
+
+ // sext(sext) is ok too.
+ if (IsSExt && isa<SExtInst>(Inst))
return true;
// We can get through binary operator, if it is legal. In other words, the
// binary operator must have a nuw or nsw flag.
const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
- (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
+ ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
+ (IsSExt && BinOp->hasNoSignedWrap())))
return true;
// Check if we can do the following simplification.
- // sext(trunc(sext)) --> sext
+ // ext(trunc(opnd)) --> ext(opnd)
if (!isa<TruncInst>(Inst))
return false;
Value *OpndVal = Inst->getOperand(0);
- // Check if we can use this operand in the sext.
- // If the type is larger than the result type of the sign extension,
+ // Check if we can use this operand in the extension.
+ // If the type is larger than the result type of the extension,
// we cannot.
if (OpndVal->getType()->getIntegerBitWidth() >
- ConsideredSExtType->getIntegerBitWidth())
+ ConsideredExtType->getIntegerBitWidth())
return false;
// If the operand of the truncate is not an instruction, we will not have
@@ -1757,18 +1842,19 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
return false;
// Check if the source of the type is narrow enough.
- // I.e., check that trunc just drops sign extended bits.
- // #1 get the type of the operand.
+ // I.e., check that trunc just drops extended bits of the same kind of
+ // the extension.
+ // #1 get the type of the operand and check the kind of the extended bits.
const Type *OpndType;
InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
- if (It != PromotedInsts.end())
- OpndType = It->second;
- else if (isa<SExtInst>(Opnd))
- OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType();
+ if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
+ OpndType = It->second.Ty;
+ else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
+ OpndType = Opnd->getOperand(0)->getType();
else
return false;
- // #2 check that the truncate just drop sign extended bits.
+ // #2 check that the truncate just drop extended bits.
if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
return true;
@@ -1776,149 +1862,167 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
}
TypePromotionHelper::Action TypePromotionHelper::getAction(
- Instruction *SExt, const SetOfInstrs &InsertedTruncs,
+ Instruction *Ext, const SetOfInstrs &InsertedTruncs,
const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
- Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0));
- Type *SExtTy = SExt->getType();
- // If the operand of the sign extension is not an instruction, we cannot
+ assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
+ "Unexpected instruction type");
+ Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
+ Type *ExtTy = Ext->getType();
+ bool IsSExt = isa<SExtInst>(Ext);
+ // If the operand of the extension is not an instruction, we cannot
// get through.
// If it, check we can get through.
- if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts))
+ if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
return nullptr;
// Do not promote if the operand has been added by codegenprepare.
// Otherwise, it means we are undoing an optimization that is likely to be
// redone, thus causing potential infinite loop.
- if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd))
+ if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd))
return nullptr;
// SExt or Trunc instructions.
// Return the related handler.
- if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd))
- return promoteOperandForTruncAndSExt;
+ if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
+ isa<ZExtInst>(ExtOpnd))
+ return promoteOperandForTruncAndAnyExt;
// Regular instruction.
// Abort early if we will have to insert non-free instructions.
- if (!SExtOpnd->hasOneUse() &&
- !TLI.isTruncateFree(SExtTy, SExtOpnd->getType()))
+ if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
return nullptr;
- return promoteOperandForOther;
+ return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
}
-Value *TypePromotionHelper::promoteOperandForTruncAndSExt(
+Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
- // Replace sext(trunc(opnd)) or sext(sext(opnd))
- // => sext(opnd).
- TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
+ Value *ExtVal = SExt;
+ if (isa<ZExtInst>(SExtOpnd)) {
+ // Replace s|zext(zext(opnd))
+ // => zext(opnd).
+ Value *ZExt =
+ TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
+ TPT.replaceAllUsesWith(SExt, ZExt);
+ TPT.eraseInstruction(SExt);
+ ExtVal = ZExt;
+ } else {
+ // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
+ // => z|sext(opnd).
+ TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
+ }
CreatedInsts = 0;
// Remove dead code.
if (SExtOpnd->use_empty())
TPT.eraseInstruction(SExtOpnd);
- // Check if the sext is still needed.
- if (SExt->getType() != SExt->getOperand(0)->getType())
- return SExt;
+ // Check if the extension is still needed.
+ Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
+ if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType())
+ return ExtVal;
- // At this point we have: sext ty opnd to ty.
- // Reassign the uses of SExt to the opnd and remove SExt.
- Value *NextVal = SExt->getOperand(0);
- TPT.eraseInstruction(SExt, NextVal);
+ // At this point we have: ext ty opnd to ty.
+ // Reassign the uses of ExtInst to the opnd and remove ExtInst.
+ Value *NextVal = ExtInst->getOperand(0);
+ TPT.eraseInstruction(ExtInst, NextVal);
return NextVal;
}
-Value *
-TypePromotionHelper::promoteOperandForOther(Instruction *SExt,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts) {
- // By construction, the operand of SExt is an instruction. Otherwise we cannot
+Value *TypePromotionHelper::promoteOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, bool IsSExt) {
+ // By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
- Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
+ Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
CreatedInsts = 0;
- if (!SExtOpnd->hasOneUse()) {
- // SExtOpnd will be promoted.
- // All its uses, but SExt, will need to use a truncated value of the
+ if (!ExtOpnd->hasOneUse()) {
+ // ExtOpnd will be promoted.
+ // All its uses, but Ext, will need to use a truncated value of the
// promoted version.
// Create the truncate now.
- Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType());
- Trunc->removeFromParent();
- // Insert it just after the definition.
- Trunc->insertAfter(SExtOpnd);
+ Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
+ if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
+ ITrunc->removeFromParent();
+ // Insert it just after the definition.
+ ITrunc->insertAfter(ExtOpnd);
+ }
- TPT.replaceAllUsesWith(SExtOpnd, Trunc);
- // Restore the operand of SExt (which has been replace by the previous call
+ TPT.replaceAllUsesWith(ExtOpnd, Trunc);
+ // Restore the operand of Ext (which has been replace by the previous call
// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
- TPT.setOperand(SExt, 0, SExtOpnd);
+ TPT.setOperand(Ext, 0, ExtOpnd);
}
// Get through the Instruction:
// 1. Update its type.
- // 2. Replace the uses of SExt by Inst.
- // 3. Sign extend each operand that needs to be sign extended.
+ // 2. Replace the uses of Ext by Inst.
+ // 3. Extend each operand that needs to be extended.
// Remember the original type of the instruction before promotion.
// This is useful to know that the high bits are sign extended bits.
- PromotedInsts.insert(
- std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType()));
+ PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
+ ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
// Step #1.
- TPT.mutateType(SExtOpnd, SExt->getType());
+ TPT.mutateType(ExtOpnd, Ext->getType());
// Step #2.
- TPT.replaceAllUsesWith(SExt, SExtOpnd);
+ TPT.replaceAllUsesWith(Ext, ExtOpnd);
// Step #3.
- Instruction *SExtForOpnd = SExt;
+ Instruction *ExtForOpnd = Ext;
- DEBUG(dbgs() << "Propagate SExt to operands\n");
- for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
+ DEBUG(dbgs() << "Propagate Ext to operands\n");
+ for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
++OpIdx) {
- DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n');
- if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() ||
- !shouldSExtOperand(SExtOpnd, OpIdx)) {
+ DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
+ if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
+ !shouldExtOperand(ExtOpnd, OpIdx)) {
DEBUG(dbgs() << "No need to propagate\n");
continue;
}
- // Check if we can statically sign extend the operand.
- Value *Opnd = SExtOpnd->getOperand(OpIdx);
+ // Check if we can statically extend the operand.
+ Value *Opnd = ExtOpnd->getOperand(OpIdx);
if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
- DEBUG(dbgs() << "Statically sign extend\n");
- TPT.setOperand(
- SExtOpnd, OpIdx,
- ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue()));
+ DEBUG(dbgs() << "Statically extend\n");
+ unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
+ APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
+ : Cst->getValue().zext(BitWidth);
+ TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
continue;
}
// UndefValue are typed, so we have to statically sign extend them.
if (isa<UndefValue>(Opnd)) {
- DEBUG(dbgs() << "Statically sign extend\n");
- TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType()));
+ DEBUG(dbgs() << "Statically extend\n");
+ TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
continue;
}
// Otherwise we have to explicity sign extend the operand.
- // Check if SExt was reused to sign extend an operand.
- if (!SExtForOpnd) {
+ // Check if Ext was reused to extend an operand.
+ if (!ExtForOpnd) {
// If yes, create a new one.
- DEBUG(dbgs() << "More operands to sext\n");
- SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType());
+ DEBUG(dbgs() << "More operands to ext\n");
+ ExtForOpnd =
+ cast<Instruction>(IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
+ : TPT.createZExt(Ext, Opnd, Ext->getType()));
++CreatedInsts;
}
- TPT.setOperand(SExtForOpnd, 0, Opnd);
+ TPT.setOperand(ExtForOpnd, 0, Opnd);
// Move the sign extension before the insertion point.
- TPT.moveBefore(SExtForOpnd, SExtOpnd);
- TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd);
+ TPT.moveBefore(ExtForOpnd, ExtOpnd);
+ TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
// If more sext are required, new instructions will have to be created.
- SExtForOpnd = nullptr;
+ ExtForOpnd = nullptr;
}
- if (SExtForOpnd == SExt) {
- DEBUG(dbgs() << "Sign extension is useless now\n");
- TPT.eraseInstruction(SExt);
+ if (ExtForOpnd == Ext) {
+ DEBUG(dbgs() << "Extension is useless now\n");
+ TPT.eraseInstruction(Ext);
}
- return SExtOpnd;
+ return ExtOpnd;
}
/// IsPromotionProfitable - Check whether or not promoting an instruction
@@ -1951,8 +2055,8 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
if (!ISDOpcode)
return true;
// Otherwise, check if the promoted instruction is legal or not.
- return TLI.isOperationLegalOrCustom(ISDOpcode,
- EVT::getEVT(PromotedInst->getType()));
+ return TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(PromotedInst->getType()));
}
/// MatchOperationAddr - Given an instruction or constant expr, see if we can
@@ -2036,7 +2140,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::Shl: {
// Can only handle X*C and X << C.
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
- if (!RHS) return false;
+ if (!RHS)
+ return false;
int64_t Scale = RHS->getSExtValue();
if (Opcode == Instruction::Shl)
Scale = 1LL << Scale;
@@ -2129,28 +2234,32 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
return true;
}
- case Instruction::SExt: {
- // Try to move this sext out of the way of the addressing mode.
- Instruction *SExt = cast<Instruction>(AddrInst);
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ Instruction *Ext = dyn_cast<Instruction>(AddrInst);
+ if (!Ext)
+ return false;
+
+ // Try to move this ext out of the way of the addressing mode.
// Ask for a method for doing so.
- TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
- SExt, InsertedTruncs, TLI, PromotedInsts);
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts);
if (!TPH)
return false;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
unsigned CreatedInsts = 0;
- Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts);
+ Value *PromotedOperand = TPH(Ext, TPT, PromotedInsts, CreatedInsts);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
// E.g.,
// op = add opnd, 1
- // idx = sext op
+ // idx = ext op
// addr = gep base, idx
// is now:
- // promotedOpnd = sext opnd <- no match here
+ // promotedOpnd = ext opnd <- no match here
// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
// addr = gep base, op <- match
if (MovedAway)
@@ -2289,10 +2398,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
/// Add the ultimately found memory instructions to MemoryUses.
static bool FindAllMemoryUses(Instruction *I,
SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
- SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+ SmallPtrSetImpl<Instruction*> &ConsideredInsts,
const TargetLowering &TLI) {
// If we already considered this instruction, we're done.
- if (!ConsideredInsts.insert(I))
+ if (!ConsideredInsts.insert(I).second)
return false;
// If this is an obviously unfoldable instruction, bail out.
@@ -2506,7 +2615,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
worklist.pop_back();
// Break use-def graph loops.
- if (!Visited.insert(V)) {
+ if (!Visited.insert(V).second) {
Consensus = nullptr;
break;
}
@@ -2696,8 +2805,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (ResultIndex) {
- // We need to add this separately from the scale above to help with
- // SDAG consecutive load/store merging.
+ // We need to add this separately from the scale above to help with
+ // SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
@@ -3105,6 +3214,367 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
return MadeChange;
}
+namespace {
+/// \brief Helper class to promote a scalar operation to a vector one.
+/// This class is used to move downward extractelement transition.
+/// E.g.,
+/// a = vector_op <2 x i32>
+/// b = extractelement <2 x i32> a, i32 0
+/// c = scalar_op b
+/// store c
+///
+/// =>
+/// a = vector_op <2 x i32>
+/// c = vector_op a (equivalent to scalar_op on the related lane)
+/// * d = extractelement <2 x i32> c, i32 0
+/// * store d
+/// Assuming both extractelement and store can be combine, we get rid of the
+/// transition.
+class VectorPromoteHelper {
+ /// Used to perform some checks on the legality of vector operations.
+ const TargetLowering &TLI;
+
+ /// Used to estimated the cost of the promoted chain.
+ const TargetTransformInfo &TTI;
+
+ /// The transition being moved downwards.
+ Instruction *Transition;
+ /// The sequence of instructions to be promoted.
+ SmallVector<Instruction *, 4> InstsToBePromoted;
+ /// Cost of combining a store and an extract.
+ unsigned StoreExtractCombineCost;
+ /// Instruction that will be combined with the transition.
+ Instruction *CombineInst;
+
+ /// \brief The instruction that represents the current end of the transition.
+ /// Since we are faking the promotion until we reach the end of the chain
+ /// of computation, we need a way to get the current end of the transition.
+ Instruction *getEndOfTransition() const {
+ if (InstsToBePromoted.empty())
+ return Transition;
+ return InstsToBePromoted.back();
+ }
+
+ /// \brief Return the index of the original value in the transition.
+ /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
+ /// c, is at index 0.
+ unsigned getTransitionOriginalValueIdx() const {
+ assert(isa<ExtractElementInst>(Transition) &&
+ "Other kind of transitions are not supported yet");
+ return 0;
+ }
+
+ /// \brief Return the index of the index in the transition.
+ /// E.g., for "extractelement <2 x i32> c, i32 0" the index
+ /// is at index 1.
+ unsigned getTransitionIdx() const {
+ assert(isa<ExtractElementInst>(Transition) &&
+ "Other kind of transitions are not supported yet");
+ return 1;
+ }
+
+ /// \brief Get the type of the transition.
+ /// This is the type of the original value.
+ /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
+ /// transition is <2 x i32>.
+ Type *getTransitionType() const {
+ return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
+ }
+
+ /// \brief Promote \p ToBePromoted by moving \p Def downward through.
+ /// I.e., we have the following sequence:
+ /// Def = Transition <ty1> a to <ty2>
+ /// b = ToBePromoted <ty2> Def, ...
+ /// =>
+ /// b = ToBePromoted <ty1> a, ...
+ /// Def = Transition <ty1> ToBePromoted to <ty2>
+ void promoteImpl(Instruction *ToBePromoted);
+
+ /// \brief Check whether or not it is profitable to promote all the
+ /// instructions enqueued to be promoted.
+ bool isProfitableToPromote() {
+ Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
+ unsigned Index = isa<ConstantInt>(ValIdx)
+ ? cast<ConstantInt>(ValIdx)->getZExtValue()
+ : -1;
+ Type *PromotedType = getTransitionType();
+
+ StoreInst *ST = cast<StoreInst>(CombineInst);
+ unsigned AS = ST->getPointerAddressSpace();
+ unsigned Align = ST->getAlignment();
+ // Check if this store is supported.
+ if (!TLI.allowsMisalignedMemoryAccesses(
+ TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
+ // If this is not supported, there is no way we can combine
+ // the extract with the store.
+ return false;
+ }
+
+ // The scalar chain of computation has to pay for the transition
+ // scalar to vector.
+ // The vector chain has to account for the combining cost.
+ uint64_t ScalarCost =
+ TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
+ uint64_t VectorCost = StoreExtractCombineCost;
+ for (const auto &Inst : InstsToBePromoted) {
+ // Compute the cost.
+ // By construction, all instructions being promoted are arithmetic ones.
+ // Moreover, one argument is a constant that can be viewed as a splat
+ // constant.
+ Value *Arg0 = Inst->getOperand(0);
+ bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
+ isa<ConstantFP>(Arg0);
+ TargetTransformInfo::OperandValueKind Arg0OVK =
+ IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
+ : TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Arg1OVK =
+ !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
+ : TargetTransformInfo::OK_AnyValue;
+ ScalarCost += TTI.getArithmeticInstrCost(
+ Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
+ VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
+ Arg0OVK, Arg1OVK);
+ }
+ DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
+ << ScalarCost << "\nVector: " << VectorCost << '\n');
+ return ScalarCost > VectorCost;
+ }
+
+ /// \brief Generate a constant vector with \p Val with the same
+ /// number of elements as the transition.
+ /// \p UseSplat defines whether or not \p Val should be replicated
+ /// accross the whole vector.
+ /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
+ /// otherwise we generate a vector with as many undef as possible:
+ /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
+ /// used at the index of the extract.
+ Value *getConstantVector(Constant *Val, bool UseSplat) const {
+ unsigned ExtractIdx = UINT_MAX;
+ if (!UseSplat) {
+ // If we cannot determine where the constant must be, we have to
+ // use a splat constant.
+ Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
+ if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
+ ExtractIdx = CstVal->getSExtValue();
+ else
+ UseSplat = true;
+ }
+
+ unsigned End = getTransitionType()->getVectorNumElements();
+ if (UseSplat)
+ return ConstantVector::getSplat(End, Val);
+
+ SmallVector<Constant *, 4> ConstVec;
+ UndefValue *UndefVal = UndefValue::get(Val->getType());
+ for (unsigned Idx = 0; Idx != End; ++Idx) {
+ if (Idx == ExtractIdx)
+ ConstVec.push_back(Val);
+ else
+ ConstVec.push_back(UndefVal);
+ }
+ return ConstantVector::get(ConstVec);
+ }
+
+ /// \brief Check if promoting to a vector type an operand at \p OperandIdx
+ /// in \p Use can trigger undefined behavior.
+ static bool canCauseUndefinedBehavior(const Instruction *Use,
+ unsigned OperandIdx) {
+ // This is not safe to introduce undef when the operand is on
+ // the right hand side of a division-like instruction.
+ if (OperandIdx != 1)
+ return false;
+ switch (Use->getOpcode()) {
+ default:
+ return false;
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ return true;
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ return !Use->hasNoNaNs();
+ }
+ llvm_unreachable(nullptr);
+ }
+
+public:
+ VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
+ Instruction *Transition, unsigned CombineCost)
+ : TLI(TLI), TTI(TTI), Transition(Transition),
+ StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
+ assert(Transition && "Do not know how to promote null");
+ }
+
+ /// \brief Check if we can promote \p ToBePromoted to \p Type.
+ bool canPromote(const Instruction *ToBePromoted) const {
+ // We could support CastInst too.
+ return isa<BinaryOperator>(ToBePromoted);
+ }
+
+ /// \brief Check if it is profitable to promote \p ToBePromoted
+ /// by moving downward the transition through.
+ bool shouldPromote(const Instruction *ToBePromoted) const {
+ // Promote only if all the operands can be statically expanded.
+ // Indeed, we do not want to introduce any new kind of transitions.
+ for (const Use &U : ToBePromoted->operands()) {
+ const Value *Val = U.get();
+ if (Val == getEndOfTransition()) {
+ // If the use is a division and the transition is on the rhs,
+ // we cannot promote the operation, otherwise we may create a
+ // division by zero.
+ if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
+ return false;
+ continue;
+ }
+ if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
+ !isa<ConstantFP>(Val))
+ return false;
+ }
+ // Check that the resulting operation is legal.
+ int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
+ if (!ISDOpcode)
+ return false;
+ return StressStoreExtract ||
+ TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(getTransitionType(), true));
+ }
+
+ /// \brief Check whether or not \p Use can be combined
+ /// with the transition.
+ /// I.e., is it possible to do Use(Transition) => AnotherUse?
+ bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
+
+ /// \brief Record \p ToBePromoted as part of the chain to be promoted.
+ void enqueueForPromotion(Instruction *ToBePromoted) {
+ InstsToBePromoted.push_back(ToBePromoted);
+ }
+
+ /// \brief Set the instruction that will be combined with the transition.
+ void recordCombineInstruction(Instruction *ToBeCombined) {
+ assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
+ CombineInst = ToBeCombined;
+ }
+
+ /// \brief Promote all the instructions enqueued for promotion if it is
+ /// is profitable.
+ /// \return True if the promotion happened, false otherwise.
+ bool promote() {
+ // Check if there is something to promote.
+ // Right now, if we do not have anything to combine with,
+ // we assume the promotion is not profitable.
+ if (InstsToBePromoted.empty() || !CombineInst)
+ return false;
+
+ // Check cost.
+ if (!StressStoreExtract && !isProfitableToPromote())
+ return false;
+
+ // Promote.
+ for (auto &ToBePromoted : InstsToBePromoted)
+ promoteImpl(ToBePromoted);
+ InstsToBePromoted.clear();
+ return true;
+ }
+};
+} // End of anonymous namespace.
+
+void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
+ // At this point, we know that all the operands of ToBePromoted but Def
+ // can be statically promoted.
+ // For Def, we need to use its parameter in ToBePromoted:
+ // b = ToBePromoted ty1 a
+ // Def = Transition ty1 b to ty2
+ // Move the transition down.
+ // 1. Replace all uses of the promoted operation by the transition.
+ // = ... b => = ... Def.
+ assert(ToBePromoted->getType() == Transition->getType() &&
+ "The type of the result of the transition does not match "
+ "the final type");
+ ToBePromoted->replaceAllUsesWith(Transition);
+ // 2. Update the type of the uses.
+ // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
+ Type *TransitionTy = getTransitionType();
+ ToBePromoted->mutateType(TransitionTy);
+ // 3. Update all the operands of the promoted operation with promoted
+ // operands.
+ // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
+ for (Use &U : ToBePromoted->operands()) {
+ Value *Val = U.get();
+ Value *NewVal = nullptr;
+ if (Val == Transition)
+ NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
+ else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
+ isa<ConstantFP>(Val)) {
+ // Use a splat constant if it is not safe to use undef.
+ NewVal = getConstantVector(
+ cast<Constant>(Val),
+ isa<UndefValue>(Val) ||
+ canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
+ } else
+ assert(0 && "Did you modified shouldPromote and forgot to update this?");
+ ToBePromoted->setOperand(U.getOperandNo(), NewVal);
+ }
+ Transition->removeFromParent();
+ Transition->insertAfter(ToBePromoted);
+ Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
+}
+
+/// Some targets can do store(extractelement) with one instruction.
+/// Try to push the extractelement towards the stores when the target
+/// has this feature and this is profitable.
+bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
+ unsigned CombineCost = UINT_MAX;
+ if (DisableStoreExtract || !TLI ||
+ (!StressStoreExtract &&
+ !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
+ Inst->getOperand(1), CombineCost)))
+ return false;
+
+ // At this point we know that Inst is a vector to scalar transition.
+ // Try to move it down the def-use chain, until:
+ // - We can combine the transition with its single use
+ // => we got rid of the transition.
+ // - We escape the current basic block
+ // => we would need to check that we are moving it at a cheaper place and
+ // we do not do that for now.
+ BasicBlock *Parent = Inst->getParent();
+ DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
+ VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
+ // If the transition has more than one use, assume this is not going to be
+ // beneficial.
+ while (Inst->hasOneUse()) {
+ Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
+ DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
+
+ if (ToBePromoted->getParent() != Parent) {
+ DEBUG(dbgs() << "Instruction to promote is in a different block ("
+ << ToBePromoted->getParent()->getName()
+ << ") than the transition (" << Parent->getName() << ").\n");
+ return false;
+ }
+
+ if (VPH.canCombine(ToBePromoted)) {
+ DEBUG(dbgs() << "Assume " << *Inst << '\n'
+ << "will be combined with: " << *ToBePromoted << '\n');
+ VPH.recordCombineInstruction(ToBePromoted);
+ bool Changed = VPH.promote();
+ NumStoreExtractExposed += Changed;
+ return Changed;
+ }
+
+ DEBUG(dbgs() << "Try promoting.\n");
+ if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
+ return false;
+
+ DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
+
+ VPH.enqueueForPromotion(ToBePromoted);
+ Inst = ToBePromoted;
+ }
+ return false;
+}
+
bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
@@ -3199,6 +3669,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
return OptimizeShuffleVectorInst(SVI);
+ if (isa<ExtractElementInst>(I))
+ return OptimizeExtractElementInst(I);
+
return false;
}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index d2231ec..3d62d48 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -20,24 +20,20 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "post-RA-sched"
-CriticalAntiDepBreaker::
-CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
- AntiDepBreaker(), MF(MFi),
- MRI(MF.getRegInfo()),
- TII(MF.getTarget().getInstrInfo()),
- TRI(MF.getTarget().getRegisterInfo()),
- RegClassInfo(RCI),
- Classes(TRI->getNumRegs(), nullptr),
- KillIndices(TRI->getNumRegs(), 0),
- DefIndices(TRI->getNumRegs(), 0),
- KeepRegs(TRI->getNumRegs(), false) {}
+CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
+ const RegisterClassInfo &RCI)
+ : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
+ TII(MF.getSubtarget().getInstrInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
+ Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
}
@@ -273,19 +269,10 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
- // FIXME: we should use a SubRegIterator that includes self (as above), so
- // we don't have to repeat all this code for the reg itself.
- DefIndices[Reg] = Count;
- KillIndices[Reg] = ~0u;
- assert(((KillIndices[Reg] == ~0u) !=
- (DefIndices[Reg] == ~0u)) &&
- "Kill and Def maps aren't consistent for Reg!");
- KeepRegs.reset(Reg);
- Classes[Reg] = nullptr;
- RegRefs.erase(Reg);
- // Repeat, for all subregs.
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubregReg = *SubRegs;
+ // For the reg itself and all subregs: update the def to current;
+ // reset the kill state, any restrictions, and references.
+ for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) {
+ unsigned SubregReg = *SRI;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
KeepRegs.reset(SubregReg);
@@ -317,19 +304,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
RegRefs.insert(std::make_pair(Reg, &MO));
- // FIXME: we should use an MCRegAliasIterator that includes self so we don't
- // have to repeat all this code for the reg itself.
-
// It wasn't previously live but now it is, this is a kill.
- if (KillIndices[Reg] == ~0u) {
- KillIndices[Reg] = Count;
- DefIndices[Reg] = ~0u;
- assert(((KillIndices[Reg] == ~0u) !=
- (DefIndices[Reg] == ~0u)) &&
- "Kill and Def maps aren't consistent for Reg!");
- }
- // Repeat, for all aliases.
- for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ // Repeat for all aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
if (KillIndices[AliasReg] == ~0u) {
KillIndices[AliasReg] = Count;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 45e4ff5..ceef74d 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
-#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
@@ -38,32 +38,32 @@ class TargetRegisterInfo;
const TargetRegisterInfo *TRI;
const RegisterClassInfo &RegClassInfo;
- /// AllocatableSet - The set of allocatable registers.
+ /// The set of allocatable registers.
/// We'll be ignoring anti-dependencies on non-allocatable registers,
/// because they may not be safe to break.
const BitVector AllocatableSet;
- /// Classes - For live regs that are only used in one register class in a
+ /// For live regs that are only used in one register class in a
/// live range, the register class. If the register is not live, the
/// corresponding value is null. If the register is live but used in
/// multiple register classes, the corresponding value is -1 casted to a
/// pointer.
std::vector<const TargetRegisterClass*> Classes;
- /// RegRefs - Map registers to all their references within a live range.
+ /// Map registers to all their references within a live range.
std::multimap<unsigned, MachineOperand *> RegRefs;
typedef std::multimap<unsigned, MachineOperand *>::const_iterator
RegRefIter;
- /// KillIndices - The index of the most recent kill (proceeding bottom-up),
+ /// The index of the most recent kill (proceeding bottom-up),
/// or ~0u if the register is not live.
std::vector<unsigned> KillIndices;
- /// DefIndices - The index of the most recent complete def (proceeding
+ /// The index of the most recent complete def (proceeding
/// bottom up), or ~0u if the register is live.
std::vector<unsigned> DefIndices;
- /// KeepRegs - A set of registers which are live and cannot be changed to
+ /// A set of registers which are live and cannot be changed to
/// break anti-dependencies.
BitVector KeepRegs;
@@ -71,26 +71,23 @@ class TargetRegisterInfo;
CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
~CriticalAntiDepBreaker();
- /// Start - Initialize anti-dep breaking for a new basic block.
+ /// Initialize anti-dep breaking for a new basic block.
void StartBlock(MachineBasicBlock *BB) override;
- /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
- /// path
+ /// Identifiy anti-dependencies along the critical path
/// of the ScheduleDAG and break them by renaming registers.
- ///
unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
DbgValueVector &DbgValues) override;
- /// Observe - Update liveness information to account for the current
+ /// Update liveness information to account for the current
/// instruction, which will not be scheduled.
- ///
void Observe(MachineInstr *MI, unsigned Count,
unsigned InsertPosIndex) override;
- /// Finish - Finish anti-dep breaking for a basic block.
+ /// Finish anti-dep breaking for a basic block.
void FinishBlock() override;
private:
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index bc6e9dc..0a188c0 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -106,16 +106,15 @@ namespace llvm {
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT, bool IsPostRA);
+ bool IsPostRA);
// Schedule - Actual scheduling work.
void schedule() override;
};
}
-DefaultVLIWScheduler::DefaultVLIWScheduler(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- bool IsPostRA) :
- ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
+ MachineLoopInfo &MLI, bool IsPostRA)
+ : ScheduleDAGInstrs(MF, &MLI, IsPostRA) {
CanHandleTerminators = true;
}
@@ -125,12 +124,12 @@ void DefaultVLIWScheduler::schedule() {
}
// VLIWPacketizerList Ctor
-VLIWPacketizerList::VLIWPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
- TII = TM.getInstrInfo();
- ResourceTracker = TII->CreateTargetScheduleState(&TM, nullptr);
- VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF,
+ MachineLoopInfo &MLI, bool IsPostRA)
+ : MF(MF) {
+ TII = MF.getSubtarget().getInstrInfo();
+ ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA);
}
// VLIWPacketizerList Dtor
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 2b144d8..48213c1 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -19,7 +19,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "codegen-dce"
@@ -90,8 +91,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
bool AnyChanges = false;
MRI = &MF.getRegInfo();
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
@@ -122,19 +123,10 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
if (isDead(MI)) {
DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
// It is possible that some DBG_VALUE instructions refer to this
- // instruction. Examine each def operand for such references;
- // if found, mark the DBG_VALUE as undef (but don't delete it).
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- MRI->markUsesInDebugValueAsUndef(Reg);
- }
+ // instruction. They get marked as undef and will be deleted
+ // in the live debug variable analysis.
+ MI->eraseFromParentAndMarkDBGValuesForRemoval();
AnyChanges = true;
- MI->eraseFromParent();
++NumDeletes;
MIE = MBB->rend();
// MII is now pointing to the next instruction to process,
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index a195586..75b74d9 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -50,6 +51,11 @@ namespace {
bool runOnFunction(Function &Fn) override;
+ bool doFinalization(Module &M) override {
+ RewindFunction = nullptr;
+ return false;
+ }
+
void getAnalysisUsage(AnalysisUsage &AU) const override { }
const char *getPassName() const override {
@@ -118,7 +124,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
return false;
// Find the rewind function if we didn't already.
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
if (!RewindFunction) {
LLVMContext &Ctx = Resumes[0]->getContext();
FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index c470632..995606f 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -153,8 +153,8 @@ private:
public:
/// runOnMachineFunction - Initialize per-function data structures.
void runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
LiveRegUnits.clear();
LiveRegUnits.setUniverse(TRI->getNumRegUnits());
@@ -245,7 +245,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI || DefMI->getParent() != Head)
continue;
- if (InsertAfter.insert(DefMI))
+ if (InsertAfter.insert(DefMI).second)
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
if (DefMI->isTerminator()) {
DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
@@ -580,7 +580,7 @@ namespace {
class EarlyIfConverter : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const MCSchedModel *SchedModel;
+ MCSchedModel SchedModel;
MachineRegisterInfo *MRI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
@@ -688,7 +688,7 @@ bool EarlyIfConverter::shouldConvertIf() {
FBBTrace.getCriticalPath());
// Set a somewhat arbitrary limit on the critical path extension we accept.
- unsigned CritLimit = SchedModel->MispredictPenalty/2;
+ unsigned CritLimit = SchedModel.MispredictPenalty/2;
// If-conversion only makes sense when there is unexploited ILP. Compute the
// maximum-ILP resource length of the trace after if-conversion. Compare it
@@ -782,8 +782,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
.enableEarlyIfConversion())
return false;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
SchedModel =
MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
MRI = &MF.getRegInfo();
diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp
index e976d7f..85b0893 100644
--- a/lib/CodeGen/ErlangGC.cpp
+++ b/lib/CodeGen/ErlangGC.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -53,7 +54,7 @@ ErlangGC::ErlangGC() {
MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
DebugLoc DL) const {
- const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
return Label;
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index cf55b68..3680498 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -29,7 +29,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "execution-fix"
@@ -713,13 +714,13 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
- TII = MF->getTarget().getInstrInfo();
- TRI = MF->getTarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
LiveRegs = nullptr;
assert(NumRegs == RC->getNumRegs() && "Bad regclass");
DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
- << RC->getName() << " **********\n");
+ << TRI->getRegClassName(RC) << " **********\n");
// If no relevant registers are used in the function, we can skip it
// completely.
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 90b62b5..55e809e 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -19,7 +19,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "expand-isel-pseudos"
@@ -46,7 +46,7 @@ INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
// Iterate through each instruction in the function, looking for pseudos.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 8969bcc..e7bf143 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -20,8 +20,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "postrapseudos"
@@ -182,8 +183,8 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Machine Function\n"
<< "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
<< "********** Function: " << MF.getName() << '\n');
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
bool MadeChange = false;
diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp
new file mode 100644
index 0000000..5e7e853
--- /dev/null
+++ b/lib/CodeGen/ForwardControlFlowIntegrity.cpp
@@ -0,0 +1,374 @@
+//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief A pass that instruments code with fast checks for indirect calls and
+/// hooks for a function to check violations.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cfi"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
+#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
+#include "llvm/CodeGen/JumpInstrTables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumCFIIndirectCalls,
+ "Number of indirect call sites rewritten by the CFI pass");
+
+char ForwardControlFlowIntegrity::ID = 0;
+INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi",
+ "Control-Flow Integrity", true, true)
+INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
+INITIALIZE_PASS_DEPENDENCY(JumpInstrTables);
+INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi",
+ "Control-Flow Integrity", true, true)
+
+ModulePass *llvm::createForwardControlFlowIntegrityPass() {
+ return new ForwardControlFlowIntegrity();
+}
+
+ModulePass *llvm::createForwardControlFlowIntegrityPass(
+ JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
+ StringRef CFIFuncName) {
+ return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing,
+ CFIFuncName);
+}
+
+// Checks to see if a given CallSite is making an indirect call, including
+// cases where the indirect call is made through a bitcast.
+static bool isIndirectCall(CallSite &CS) {
+ if (CS.getCalledFunction())
+ return false;
+
+ // Check the value to see if it is merely a bitcast of a function. In
+ // this case, it will translate to a direct function call in the resulting
+ // assembly, so we won't treat it as an indirect call here.
+ const Value *V = CS.getCalledValue();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
+ }
+
+ // Otherwise, since we know it's a call, it must be an indirect call
+ return true;
+}
+
+static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning";
+
+ForwardControlFlowIntegrity::ForwardControlFlowIntegrity()
+ : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single),
+ CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") {
+ initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
+}
+
+ForwardControlFlowIntegrity::ForwardControlFlowIntegrity(
+ JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
+ std::string CFIFuncName)
+ : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType),
+ CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) {
+ initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
+}
+
+ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {}
+
+void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<JumpInstrTableInfo>();
+ AU.addRequired<JumpInstrTables>();
+}
+
+void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) {
+ // To get the indirect calls, we iterate over all functions and iterate over
+ // the list of basic blocks in each. We extract a total list of indirect calls
+ // before modifying any of them, since our modifications will modify the list
+ // of basic blocks.
+ for (Function &F : M) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ CallSite CS(&I);
+ if (!(CS && isIndirectCall(CS)))
+ continue;
+
+ Value *CalledValue = CS.getCalledValue();
+
+ // Don't rewrite this instruction if the indirect call is actually just
+ // inline assembly, since our transformation will generate an invalid
+ // module in that case.
+ if (isa<InlineAsm>(CalledValue))
+ continue;
+
+ IndirectCalls.push_back(&I);
+ }
+ }
+ }
+}
+
+void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M,
+ CFITables &CFIT) {
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
+ for (Instruction *I : IndirectCalls) {
+ CallSite CS(I);
+ Value *CalledValue = CS.getCalledValue();
+
+ // Get the function type for this call and look it up in the tables.
+ Type *VTy = CalledValue->getType();
+ PointerType *PTy = dyn_cast<PointerType>(VTy);
+ Type *EltTy = PTy->getElementType();
+ FunctionType *FunTy = dyn_cast<FunctionType>(EltTy);
+ FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy);
+ ++NumCFIIndirectCalls;
+ Constant *JumpTableStart = nullptr;
+ Constant *JumpTableMask = nullptr;
+ Constant *JumpTableSize = nullptr;
+
+ // Some call sites have function types that don't correspond to any
+ // address-taken function in the module. This happens when function pointers
+ // are passed in from external code.
+ auto it = CFIT.find(TransformedTy);
+ if (it == CFIT.end()) {
+ // In this case, make sure that the function pointer will change by
+ // setting the mask and the start to be 0 so that the transformed
+ // function is 0.
+ JumpTableStart = ConstantInt::get(Int64Ty, 0);
+ JumpTableMask = ConstantInt::get(Int64Ty, 0);
+ JumpTableSize = ConstantInt::get(Int64Ty, 0);
+ } else {
+ JumpTableStart = it->second.StartValue;
+ JumpTableMask = it->second.MaskValue;
+ JumpTableSize = it->second.Size;
+ }
+
+ rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask,
+ JumpTableSize);
+ }
+
+ return;
+}
+
+bool ForwardControlFlowIntegrity::runOnModule(Module &M) {
+ JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>();
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+
+ // JumpInstrTableInfo stores information about the alignment of each entry.
+ // The alignment returned by JumpInstrTableInfo is alignment in bytes, not
+ // in the exponent.
+ ByteAlignment = JITI->entryByteAlignment();
+ LogByteAlignment = llvm::Log2_64(ByteAlignment);
+
+ // Set up tables for control-flow integrity based on information about the
+ // jump-instruction tables.
+ CFITables CFIT;
+ for (const auto &KV : JITI->getTables()) {
+ uint64_t Size = static_cast<uint64_t>(KV.second.size());
+ uint64_t TableSize = NextPowerOf2(Size);
+
+ int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment;
+ Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue);
+ Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size);
+
+ // The base of the table is defined to be the first jumptable function in
+ // the table.
+ Function *First = KV.second.begin()->second;
+ Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy);
+ CFIT[KV.first].StartValue = JumpTableStartValue;
+ CFIT[KV.first].MaskValue = JumpTableMaskValue;
+ CFIT[KV.first].Size = JumpTableSize;
+ }
+
+ if (CFIT.empty())
+ return false;
+
+ getIndirectCalls(M);
+
+ if (!CFIEnforcing) {
+ addWarningFunction(M);
+ }
+
+ // Update the instructions with the check and the indirect jump through our
+ // table.
+ updateIndirectCalls(M, CFIT);
+
+ return true;
+}
+
+void ForwardControlFlowIntegrity::addWarningFunction(Module &M) {
+ PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext());
+
+ // Get the type of the Warning Function: void (i8*, i8*),
+ // where the first argument is the name of the function in which the violation
+ // occurs, and the second is the function pointer that violates CFI.
+ SmallVector<Type *, 2> WarningFunArgs;
+ WarningFunArgs.push_back(CharPtrTy);
+ WarningFunArgs.push_back(CharPtrTy);
+ FunctionType *WarningFunTy =
+ FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false);
+
+ if (!CFIFuncName.empty()) {
+ Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy);
+ if (!FailureFun)
+ report_fatal_error("Could not get or insert the function specified by"
+ " -cfi-func-name");
+ } else {
+ // The default warning function swallows the warning and lets the call
+ // continue, since there's no generic way for it to print out this
+ // information.
+ Function *WarningFun = M.getFunction(cfi_failure_func_name);
+ if (!WarningFun) {
+ WarningFun =
+ Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage,
+ cfi_failure_func_name, &M);
+ }
+
+ BasicBlock *Entry =
+ BasicBlock::Create(M.getContext(), "entry", WarningFun, 0);
+ ReturnInst::Create(M.getContext(), Entry);
+ }
+}
+
+void ForwardControlFlowIntegrity::rewriteFunctionPointer(
+ Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart,
+ Constant *JumpTableMask, Constant *JumpTableSize) {
+ IRBuilder<> TempBuilder(I);
+
+ Type *OrigFunType = FunPtr->getType();
+
+ BasicBlock *CurBB = cast<BasicBlock>(I->getParent());
+ Function *CurF = cast<Function>(CurBB->getParent());
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
+
+ Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty);
+ Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty);
+
+ Value *NewFunPtr = nullptr;
+ Value *Check = nullptr;
+ switch (CFIType) {
+ case CFIntegrity::Sub: {
+ // This is the subtract, mask, and add version.
+ // Subtract from the base.
+ Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
+
+ // Mask the difference to force this to be a table offset.
+ Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask);
+
+ // Add it back to the base.
+ Value *Result = TempBuilder.CreateAdd(And, TStartInt);
+
+ // Convert it back into a function pointer that we can call.
+ NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
+ break;
+ }
+ case CFIntegrity::Ror: {
+ // This is the subtract and rotate version.
+ // Rotate right by the alignment value. The optimizer should recognize
+ // this sequence as a rotation.
+
+ // This cast is safe, since unsigned is always a subset of uint64_t.
+ uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment);
+ Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64);
+ Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64);
+
+ // Subtract from the base.
+ Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
+
+ // Create the equivalent of a rotate-right instruction.
+ Value *Shr = TempBuilder.CreateLShr(Sub, RightShift);
+ Value *Shl = TempBuilder.CreateShl(Sub, LeftShift);
+ Value *Or = TempBuilder.CreateOr(Shr, Shl);
+
+ // Perform unsigned comparison to check for inclusion in the table.
+ Check = TempBuilder.CreateICmpULT(Or, JumpTableSize);
+ NewFunPtr = FunPtr;
+ break;
+ }
+ case CFIntegrity::Add: {
+ // This is the mask and add version.
+ // Mask the function pointer to turn it into an offset into the table.
+ Value *And = TempBuilder.CreateAnd(TI, JumpTableMask);
+
+ // Then or this offset to the base and get the pointer value.
+ Value *Result = TempBuilder.CreateAdd(And, TStartInt);
+
+ // Convert it back into a function pointer that we can call.
+ NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
+ break;
+ }
+ }
+
+ if (!CFIEnforcing) {
+ // If a check hasn't been added (in the rotation version), then check to see
+ // if it's the same as the original function. This check determines whether
+ // or not we call the CFI failure function.
+ if (!Check)
+ Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr);
+ BasicBlock *InvalidPtrBlock =
+ BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0);
+ BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I);
+
+ // Remove the unconditional branch that connects the two blocks.
+ TerminatorInst *TermInst = CurBB->getTerminator();
+ TermInst->eraseFromParent();
+
+ // Add a conditional branch that depends on the Check above.
+ BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB);
+
+ // Call the warning function for this pointer, then continue.
+ Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock);
+ insertWarning(M, InvalidPtrBlock, BI, FunPtr);
+ } else {
+ // Modify the instruction to call this value.
+ CallSite CS(I);
+ CS.setCalledFunction(NewFunPtr);
+ }
+}
+
+void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block,
+ Instruction *I, Value *FunPtr) {
+ Function *ParentFun = cast<Function>(Block->getParent());
+
+ // Get the function to call right before the instruction.
+ Function *WarningFun = nullptr;
+ if (CFIFuncName.empty()) {
+ WarningFun = M.getFunction(cfi_failure_func_name);
+ } else {
+ WarningFun = M.getFunction(CFIFuncName);
+ }
+
+ assert(WarningFun && "Could not find the CFI failure function");
+
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+
+ IRBuilder<> WarningInserter(I);
+ // Create a mergeable GlobalVariable containing the name of the function.
+ Value *ParentNameGV =
+ WarningInserter.CreateGlobalString(ParentFun->getName());
+ Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy);
+ Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy);
+ WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr);
+}
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index c3e4f3e..ed40982 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -73,7 +73,7 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
std::unique_ptr<GCStrategy> S = I->instantiate();
S->M = M;
S->Name = Name;
- StrategyMap.GetOrCreateValue(Name).setValue(S.get());
+ StrategyMap[Name] = S.get();
StrategyList.push_back(std::move(S));
return StrategyList.back().get();
}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 1fdff6b..b346657 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -31,6 +31,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -377,7 +378,7 @@ void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
}
void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
- const TargetFrameLowering *TFI = TM->getFrameLowering();
+ const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering();
assert(TFI && "TargetRegisterInfo not available!");
for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
@@ -403,7 +404,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
TM = &MF.getTarget();
MMI = &getAnalysis<MachineModuleInfo>();
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
// Find the size of the stack frame.
FI->setFrameSize(MF.getFrameInfo()->getStackSize());
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 027ee38..457d7d6 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -68,6 +68,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "global-merge"
@@ -142,7 +143,7 @@ INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables",
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
const DataLayout *DL = TLI->getDataLayout();
// FIXME: Infer the maximum possible offset depending on the actual users
@@ -281,7 +282,7 @@ bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
BSSGlobals;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
const DataLayout *DL = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1502d5f..e84d25d 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -30,7 +31,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -161,6 +161,7 @@ namespace {
const TargetLoweringBase *TLI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
MachineRegisterInfo *MRI;
@@ -177,6 +178,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -269,15 +271,16 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
- TLI = MF.getTarget().getTargetLowering();
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TLI = MF.getSubtarget().getTargetLowering();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MRI = &MF.getRegInfo();
const TargetSubtargetInfo &ST =
MF.getTarget().getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
if (!TII) return false;
@@ -286,9 +289,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool BFChange = false;
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true, false);
- BFChange = BF.OptimizeFunction(MF, TII,
- MF.getTarget().getRegisterInfo(),
+ BranchFolder BF(true, false, *MBFI, *MBPI);
+ BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
@@ -420,9 +422,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
- BranchFolder BF(false, false);
- BF.OptimizeFunction(MF, TII,
- MF.getTarget().getRegisterInfo(),
+ BranchFolder BF(false, false, *MBFI, *MBPI);
+ BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
@@ -940,9 +941,8 @@ static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
/// to determine if it can be if-converted. If predecessor is already enqueued,
/// dequeue it!
void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
- for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
- E = BB->pred_end(); PI != E; ++PI) {
- BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ for (const auto &Predecessor : BB->predecessors()) {
+ BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
if (PBBI.IsDone || PBBI.BB == BB)
continue;
PBBI.IsAnalyzed = false;
@@ -1184,6 +1184,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0;
uint32_t WeightScale = 0;
+
if (HasEarlyExit) {
// Get weights before modifying CvtBBI->BB and BBI.BB.
CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB);
@@ -1192,6 +1193,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB);
SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale);
}
+
if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index f3c8d3d..6a6e15d 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -34,7 +34,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -139,21 +138,16 @@ private:
~InlineSpiller() {}
public:
- InlineSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm)
- : MF(mf),
- LIS(pass.getAnalysis<LiveIntervals>()),
- LSS(pass.getAnalysis<LiveStacks>()),
- AA(&pass.getAnalysis<AliasAnalysis>()),
- MDT(pass.getAnalysis<MachineDominatorTree>()),
- Loops(pass.getAnalysis<MachineLoopInfo>()),
- VRM(vrm),
- MFI(*mf.getFrameInfo()),
- MRI(mf.getRegInfo()),
- TII(*mf.getTarget().getInstrInfo()),
- TRI(*mf.getTarget().getRegisterInfo()),
- MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
+ InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+ : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AliasAnalysis>()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+ TII(*mf.getSubtarget().getInstrInfo()),
+ TRI(*mf.getSubtarget().getRegisterInfo()),
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
void spill(LiveRangeEdit &) override;
@@ -190,11 +184,16 @@ private:
}
namespace llvm {
+
+Spiller::~Spiller() { }
+void Spiller::anchor() { }
+
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
return new InlineSpiller(pass, mf, vrm);
}
+
}
//===----------------------------------------------------------------------===//
@@ -824,7 +823,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
WorkList.push_back(std::make_pair(LI, VNI));
do {
std::tie(LI, VNI) = WorkList.pop_back_val();
- if (!UsedValues.insert(VNI))
+ if (!UsedValues.insert(VNI).second)
continue;
if (VNI->isPHIDef()) {
@@ -853,6 +852,15 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
MachineBasicBlock::iterator MI) {
+
+ // Analyze instruction
+ SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
+ MIBundleOperands::VirtRegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+
+ if (!RI.Reads)
+ return false;
+
SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
@@ -883,9 +891,6 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
// If the instruction also writes VirtReg.reg, it had better not require the
// same register for uses and defs.
- SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- MIBundleOperands::VirtRegInfo RI =
- MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
if (RI.Tied) {
markValueUsed(&VirtReg, ParentVNI);
DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
@@ -939,10 +944,15 @@ void InlineSpiller::reMaterializeAll() {
for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
unsigned Reg = RegsToSpill[i];
LiveInterval &LI = LIS.getInterval(Reg);
- for (MachineRegisterInfo::use_bundle_nodbg_iterator
- RI = MRI.use_bundle_nodbg_begin(Reg), E = MRI.use_bundle_nodbg_end();
- RI != E; ) {
- MachineInstr *MI = &*(RI++);
+ for (MachineRegisterInfo::reg_bundle_iterator
+ RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
+ RegI != E; ) {
+ MachineInstr *MI = &*(RegI++);
+
+ // Debug values are not allowed to affect codegen.
+ if (MI->isDebugValue())
+ continue;
+
anyRemat |= reMaterializeFor(LI, MI);
}
}
@@ -1218,12 +1228,16 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Modify DBG_VALUE now that the value is in a spill slot.
bool IsIndirect = MI->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+ const MDNode *Var = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
DebugLoc DL = MI->getDebugLoc();
DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
MachineBasicBlock *MBB = MI->getParent();
BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr);
+ .addFrameIndex(StackSlot)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
continue;
}
@@ -1363,7 +1377,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
StackInt = nullptr;
DEBUG(dbgs() << "Inline spilling "
- << MRI.getRegClass(edit.getReg())->getName()
+ << TRI.getRegClassName(MRI.getRegClass(edit.getReg()))
<< ':' << edit.getParent()
<< "\nFrom original " << PrintReg(Original) << '\n');
assert(edit.getParent().isSpillable() &&
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 91a1da9..1791afb 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_INTERFERENCECACHE
-#define LLVM_CODEGEN_INTERFERENCECACHE
+#ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
+#define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
#include "llvm/CodeGen/LiveIntervalUnion.h"
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index a8b8600..2c95e9e 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -459,9 +459,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(CI->getOperand(0));
break;
+ case Intrinsic::assume:
case Intrinsic::var_annotation:
- break; // Strip out annotate intrinsic
-
+ break; // Strip out these intrinsics
+
case Intrinsic::memcpy: {
Type *IntPtr = DL.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
@@ -527,6 +528,34 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
break;
}
+ case Intrinsic::sin: {
+ ReplaceFPIntrinsicWithCall(CI, "sinf", "sin", "sinl");
+ break;
+ }
+ case Intrinsic::cos: {
+ ReplaceFPIntrinsicWithCall(CI, "cosf", "cos", "cosl");
+ break;
+ }
+ case Intrinsic::floor: {
+ ReplaceFPIntrinsicWithCall(CI, "floorf", "floor", "floorl");
+ break;
+ }
+ case Intrinsic::ceil: {
+ ReplaceFPIntrinsicWithCall(CI, "ceilf", "ceil", "ceill");
+ break;
+ }
+ case Intrinsic::trunc: {
+ ReplaceFPIntrinsicWithCall(CI, "truncf", "trunc", "truncl");
+ break;
+ }
+ case Intrinsic::round: {
+ ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl");
+ break;
+ }
+ case Intrinsic::copysign: {
+ ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl");
+ break;
+ }
case Intrinsic::flt_rounds:
// Lower to "round to the nearest"
if (!CI->getType()->isVoidTy())
diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp
deleted file mode 100644
index 96a5389..0000000
--- a/lib/CodeGen/JITCodeEmitter.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/JITCodeEmitter.h"
-
-using namespace llvm;
-
-void JITCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp
index 61ef722..20f775c 100644
--- a/lib/CodeGen/JumpInstrTables.cpp
+++ b/lib/CodeGen/JumpInstrTables.cpp
@@ -163,7 +163,7 @@ void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
FunctionType *OrigFunTy = Target->getFunctionType();
- FunctionType *FunTy = transformType(OrigFunTy);
+ FunctionType *FunTy = transformType(JTType, OrigFunTy);
JumpMap::iterator it = Metadata.find(FunTy);
if (Metadata.end() == it) {
@@ -191,11 +191,12 @@ Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
}
bool JumpInstrTables::hasTable(FunctionType *FunTy) {
- FunctionType *TransTy = transformType(FunTy);
+ FunctionType *TransTy = transformType(JTType, FunTy);
return Metadata.end() != Metadata.find(TransTy);
}
-FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
+FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT,
+ FunctionType *FunTy) {
// Returning nullptr forces all types into the same table, since all types map
// to the same type
Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
@@ -211,7 +212,7 @@ FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
FunctionType *VoidFnTy = FunctionType::get(
Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
- switch (JTType) {
+ switch (JTT) {
case JumpTable::Single:
return FunctionType::get(RetTy, EmptyParams, false);
@@ -251,16 +252,12 @@ FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
}
bool JumpInstrTables::runOnModule(Module &M) {
- // Make sure the module is well-formed, especially with respect to jumptable.
- if (verifyModule(M))
- return false;
-
JITI = &getAnalysis<JumpInstrTableInfo>();
- // Get the set of jumptable-annotated functions.
+ // Get the set of jumptable-annotated functions that have their address taken.
DenseMap<Function *, Function *> Functions;
for (Function &F : M) {
- if (F.hasFnAttribute(Attribute::JumpTable)) {
+ if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) {
assert(F.hasUnnamedAddr() &&
"Attribute 'jumptable' requires 'unnamed_addr'");
Functions[&F] = nullptr;
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index df96b94..61face2 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -13,8 +13,10 @@
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
#include "llvm/CodeGen/JumpInstrTables.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -48,8 +50,8 @@ EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
void LLVMTargetMachine::initAsmInfo() {
- MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(),
- TargetTriple);
+ MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
+ *getSubtargetImpl()->getRegisterInfo(), getTargetTriple());
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
// and if the old one gets included then MCAsmInfo will be NULL and
// we'll crash later.
@@ -110,9 +112,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI =
- new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
- &TM->getTargetLowering()->getObjFileLowering());
+ MachineModuleInfo *MMI = new MachineModuleInfo(
+ *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(),
+ &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering());
PM.add(MMI);
// Set up a MachineFunction for the rest of CodeGen to work on.
@@ -143,8 +145,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
AnalysisID StopAfter) {
// Passes to handle jumptable function annotations. These can't be handled at
// JIT time, so we don't add them directly to addPassesToGenerateCode.
- PM.add(createJumpInstrTableInfoPass());
+ PM.add(createJumpInstrTableInfoPass(
+ getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound()));
PM.add(createJumpInstrTablesPass(Options.JTType));
+ if (Options.FCFI)
+ PM.add(createForwardControlFlowIntegrityPass(
+ Options.JTType, Options.CFIType, Options.CFIEnforcing,
+ Options.getCFIFuncName()));
// Add common CodeGen passes.
MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
@@ -165,10 +172,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
if (Options.MCOptions.MCSaveTempLabels)
Context->setAllowTemporaryLabels(false);
- const MCAsmInfo &MAI = *getMCAsmInfo();
- const MCRegisterInfo &MRI = *getRegisterInfo();
- const MCInstrInfo &MII = *getInstrInfo();
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
+ const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo();
std::unique_ptr<MCStreamer> AsmStreamer;
switch (FileType) {
@@ -201,9 +208,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
if (!MCE || !MAB)
return true;
- AsmStreamer.reset(getTarget().createMCObjectStreamer(
- getTargetTriple(), *Context, *MAB, Out, MCE, STI,
- Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack));
+ AsmStreamer.reset(
+ getTarget()
+ .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE,
+ STI, Options.MCOptions.MCRelaxAll));
break;
}
case CGFT_Null:
@@ -226,26 +234,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
return false;
}
-/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
-/// get machine code emitted. This uses a JITCodeEmitter object to handle
-/// actually outputting the machine code and resolving things like the address
-/// of functions. This method should return true if machine code emission is
-/// not supported.
-///
-bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
- JITCodeEmitter &JCE,
- bool DisableVerify) {
- // Add common CodeGen passes.
- MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr,
- nullptr);
- if (!Context)
- return true;
-
- addCodeEmitter(PM, JCE);
-
- return false; // success!
-}
-
/// addPassesToEmitMC - Add passes to the specified pass manager to get
/// machine code emitted with the MCJIT. This method returns true if machine
/// code is not supported. It fills the MCContext Ctx pointer which can be
@@ -265,19 +253,20 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- const MCRegisterInfo &MRI = *getRegisterInfo();
+ const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
- STI, *Ctx);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(
+ *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
if (!MCE || !MAB)
return true;
std::unique_ptr<MCStreamer> AsmStreamer;
- AsmStreamer.reset(getTarget().createMCObjectStreamer(
- getTargetTriple(), *Ctx, *MAB, Out, MCE, STI,
- Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack));
+ AsmStreamer.reset(getTarget()
+ .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB,
+ Out, MCE, STI,
+ Options.MCOptions.MCRelaxAll));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index d12c234..b621e3b 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -137,6 +137,8 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
/// not available then create new lexical scope.
LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
+ if (DL.isUnknown())
+ return nullptr;
MDNode *Scope = nullptr;
MDNode *InlinedAt = nullptr;
DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
@@ -172,9 +174,12 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
std::make_tuple(Parent, DIDescriptor(Scope),
nullptr, false)).first;
- if (!Parent && DIDescriptor(Scope).isSubprogram() &&
- DISubprogram(Scope).describes(MF->getFunction()))
+ if (!Parent) {
+ assert(DIDescriptor(Scope).isSubprogram());
+ assert(DISubprogram(Scope).describes(MF->getFunction()));
+ assert(!CurrentFnLexicalScope);
CurrentFnLexicalScope = &I->second;
+ }
return &I->second;
}
@@ -285,7 +290,7 @@ void LexicalScopes::assignInstructionRanges(
/// have machine instructions that belong to lexical scope identified by
/// DebugLoc.
void LexicalScopes::getMachineBasicBlocks(
- DebugLoc DL, SmallPtrSet<const MachineBasicBlock *, 4> &MBBs) {
+ DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
MBBs.clear();
LexicalScope *Scope = getOrCreateLexicalScope(DL);
if (!Scope)
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 7d5646b..1624851 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -39,6 +39,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <memory>
@@ -109,7 +110,8 @@ public:
namespace {
class LDVImpl;
class UserValue {
- const MDNode *variable; ///< The debug info variable we are part of.
+ const MDNode *Variable; ///< The debug info variable we are part of.
+ const MDNode *Expression; ///< Any complex address expression.
unsigned offset; ///< Byte offset into variable.
bool IsIndirect; ///< true if this is a register-indirect+offset value.
DebugLoc dl; ///< The debug location for the variable. This is
@@ -139,11 +141,10 @@ class UserValue {
public:
/// UserValue - Create a new UserValue.
- UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L,
- LocMap::Allocator &alloc)
- : variable(var), offset(o), IsIndirect(i), dl(L), leader(this),
- next(nullptr), locInts(alloc)
- {}
+ UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i,
+ DebugLoc L, LocMap::Allocator &alloc)
+ : Variable(var), Expression(expr), offset(o), IsIndirect(i), dl(L),
+ leader(this), next(nullptr), locInts(alloc) {}
/// getLeader - Get the leader of this value's equivalence class.
UserValue *getLeader() {
@@ -157,8 +158,10 @@ public:
UserValue *getNext() const { return next; }
/// match - Does this UserValue match the parameters?
- bool match(const MDNode *Var, unsigned Offset, bool indirect) const {
- return Var == variable && Offset == offset && indirect == IsIndirect;
+ bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset,
+ bool indirect) const {
+ return Var == Variable && Expr == Expression && Offset == offset &&
+ indirect == IsIndirect;
}
/// merge - Merge equivalence classes.
@@ -267,7 +270,7 @@ public:
LiveIntervals &LIS, const TargetInstrInfo &TRI);
/// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
- /// variable may have more than one corresponding DBG_VALUE instructions.
+ /// variable may have more than one corresponding DBG_VALUE instructions.
/// Only first one needs DebugLoc to identify variable's lexical scope
/// in source file.
DebugLoc findDebugLoc();
@@ -306,8 +309,8 @@ class LDVImpl {
UVMap userVarMap;
/// getUserValue - Find or create a UserValue.
- UserValue *getUserValue(const MDNode *Var, unsigned Offset,
- bool IsIndirect, DebugLoc DL);
+ UserValue *getUserValue(const MDNode *Var, const MDNode *Expr,
+ unsigned Offset, bool IsIndirect, DebugLoc DL);
/// lookupVirtReg - Find the EC leader for VirtReg or null.
UserValue *lookupVirtReg(unsigned VirtReg);
@@ -344,6 +347,7 @@ public:
"Dbg values are not emitted in LDV");
EmitDone = false;
ModifiedMF = false;
+ LS.reset();
}
/// mapVirtReg - Map virtual register to an equivalence class.
@@ -360,8 +364,8 @@ public:
} // namespace
void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
- DIVariable DV(variable);
- OS << "!\"";
+ DIVariable DV(Variable);
+ OS << "!\"";
DV.printExtendedName(OS);
OS << "\"\t";
if (offset)
@@ -421,19 +425,20 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
LDV->mapVirtReg(locations[i].getReg(), this);
}
-UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
- bool IsIndirect, DebugLoc DL) {
+UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
+ unsigned Offset, bool IsIndirect,
+ DebugLoc DL) {
UserValue *&Leader = userVarMap[Var];
if (Leader) {
UserValue *UV = Leader->getLeader();
Leader = UV;
for (; UV; UV = UV->getNext())
- if (UV->match(Var, Offset, IsIndirect))
+ if (UV->match(Var, Expr, Offset, IsIndirect))
return UV;
}
userValues.push_back(
- make_unique<UserValue>(Var, Offset, IsIndirect, DL, allocator));
+ make_unique<UserValue>(Var, Expr, Offset, IsIndirect, DL, allocator));
UserValue *UV = userValues.back().get();
Leader = UserValue::merge(Leader, UV);
return UV;
@@ -453,7 +458,7 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
// DBG_VALUE loc, offset, variable
- if (MI->getNumOperands() != 3 ||
+ if (MI->getNumOperands() != 4 ||
!(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) ||
!MI->getOperand(2).isMetadata()) {
DEBUG(dbgs() << "Can't handle " << *MI);
@@ -463,9 +468,11 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
// Get or create the UserValue for (variable,offset).
bool IsIndirect = MI->isIndirectDebugValue();
unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *Var = MI->getOperand(2).getMetadata();
+ const MDNode *Var = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
//here.
- UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc());
+ UserValue *UV =
+ getUserValue(Var, Expr, Offset, IsIndirect, MI->getDebugLoc());
UV->addDef(Idx, MI->getOperand(0));
return true;
}
@@ -698,7 +705,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
LIS = &pass.getAnalysis<LiveIntervals>();
MDT = &pass.getAnalysis<MachineDominatorTree>();
- TRI = mf.getTarget().getRegisterInfo();
+ TRI = mf.getSubtarget().getRegisterInfo();
LS.initialize(mf);
DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
<< mf.getName() << " **********\n");
@@ -710,11 +717,25 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
return Changed;
}
+static void removeDebugValues(MachineFunction &mf) {
+ for (MachineBasicBlock &MBB : mf) {
+ for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ MBBI = MBB.erase(MBBI);
+ }
+ }
+}
+
bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
- if (!FunctionDIs.count(mf.getFunction()))
+ if (!FunctionDIs.count(mf.getFunction())) {
+ removeDebugValues(mf);
return false;
+ }
if (!pImpl)
pImpl = new LDVImpl(this);
return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
@@ -936,10 +957,13 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
if (Loc.isReg())
BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect, Loc.getReg(), offset, variable);
+ IsIndirect, Loc.getReg(), offset, Variable, Expression);
else
BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
- .addOperand(Loc).addImm(offset).addMetadata(variable);
+ .addOperand(Loc)
+ .addImm(offset)
+ .addMetadata(Variable)
+ .addMetadata(Expression);
}
void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
@@ -979,7 +1003,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
if (!MF)
return;
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
userValues[i]->rewriteLocations(*VRM, *TRI);
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 7ec0d17..7e3b361 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -18,8 +18,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
-#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/DebugInfo.h"
@@ -72,4 +72,4 @@ private:
} // namespace llvm
-#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#endif
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index ce8ce96..ddb0032 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -206,7 +206,7 @@ void LiveRange::RenumberValues() {
valnos.clear();
for (const_iterator I = begin(), E = end(); I != E; ++I) {
VNInfo *VNI = I->valno;
- if (!Seen.insert(VNI))
+ if (!Seen.insert(VNI).second)
continue;
assert(!VNI->isUnused() && "Unused valno used by live segment");
VNI->id = (unsigned)valnos.size();
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 1559560..1742e63 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -34,8 +34,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cmath>
#include <limits>
@@ -110,9 +110,8 @@ void LiveIntervals::releaseMemory() {
bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &MF->getRegInfo();
- TM = &fn.getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
AA = &getAnalysis<AliasAnalysis>();
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
@@ -380,12 +379,13 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
(void)ExtVNI;
assert(ExtVNI == VNI && "Unexpected existing value number");
// Is this a PHIDef we haven't seen before?
- if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+ if (!VNI->isPHIDef() || VNI->def != BlockStart ||
+ !UsedPHIs.insert(VNI).second)
continue;
// The PHI is live, make sure the predecessors are live-out.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
- if (!LiveOut.insert(*PI))
+ if (!LiveOut.insert(*PI).second)
continue;
SlotIndex Stop = getMBBEndIdx(*PI);
// A predecessor is not required to have a live-out value for a PHI.
@@ -402,7 +402,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Make sure VNI is live-out from the predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
- if (!LiveOut.insert(*PI))
+ if (!LiveOut.insert(*PI).second)
continue;
SlotIndex Stop = getMBBEndIdx(*PI);
assert(li->getVNInfoBefore(Stop) == VNI &&
@@ -785,7 +785,7 @@ private:
/// Update a single live range, assuming an instruction has been moved from
/// OldIdx to NewIdx.
void updateRange(LiveRange &LR, unsigned Reg) {
- if (!Updated.insert(&LR))
+ if (!Updated.insert(&LR).second)
return;
DEBUG({
dbgs() << " ";
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 67ab559..345d6c4 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -19,8 +19,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_LIVERANGECALC_H
-#define LLVM_CODEGEN_LIVERANGECALC_H
+#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H
+#define LLVM_LIB_CODEGEN_LIVERANGECALC_H
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/IndexedMap.h"
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index c27d630..a0fb712 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -411,8 +411,11 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
- DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
- << MRI.getRegClass(LI.reg)->getName() << '\n');
+ DEBUG({
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+ << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n';
+ });
VRAI.calculateSpillWeightAndHint(LI);
}
}
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index de2ce22..a8cae08 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -48,7 +47,7 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index b3161a4..8a6ac25 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -20,6 +20,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <limits>
using namespace llvm;
@@ -49,7 +50,7 @@ void LiveStacks::releaseMemory() {
}
bool LiveStacks::runOnMachineFunction(MachineFunction &MF) {
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
// FIXME: No analysis is being done right now. We are relying on the
// register allocators to provide the information.
return false;
@@ -80,7 +81,7 @@ void LiveStacks::print(raw_ostream &OS, const Module*) const {
int Slot = I->first;
const TargetRegisterClass *RC = getIntervalRegClass(Slot);
if (RC)
- OS << " [" << RC->getName() << "]\n";
+ OS << " [" << TRI->getRegClassName(RC) << "]\n";
else
OS << " [Unknown]\n";
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 758b216..c4bca5f 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
using namespace llvm;
@@ -497,17 +496,135 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
}
}
+void LiveVariables::runOnInstr(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &Defs) {
+ assert(!MI->isDebugValue());
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->isPHI())
+ NumOperandsToProcess = 1;
+
+ // Clear kill and dead markers. LV will recompute them.
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ SmallVector<unsigned, 1> RegMasks;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ RegMasks.push_back(i);
+ continue;
+ }
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ MO.setIsKill(false);
+ if (MO.readsReg())
+ UseRegs.push_back(MOReg);
+ } else /*MO.isDef()*/ {
+ MO.setIsDead(false);
+ DefRegs.push_back(MOReg);
+ }
+ }
+
+ MachineBasicBlock *MBB = MI->getParent();
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all masked registers. (Call clobbers).
+ for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+ HandleRegMask(MI->getOperand(RegMasks[i]));
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegDef(MOReg, MI, Defs);
+ }
+ UpdatePhysRegDefs(MI, Defs);
+}
+
+void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
+ // Mark live-in registers as live-in.
+ SmallVector<unsigned, 4> Defs;
+ for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, nullptr, Defs);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ DistanceMap.insert(std::make_pair(MI, Dist++));
+
+ runOnInstr(MI, Defs);
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // MachineCSE may CSE instructions which write to non-allocatable physical
+ // registers across MBBs. Remember if any reserved register is liveout.
+ SmallSet<unsigned, 4> LiveOuts;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->isLandingPad())
+ continue;
+ for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
+ LE = SuccMBB->livein_end(); LI != LE; ++LI) {
+ unsigned LReg = *LI;
+ if (!TRI->isInAllocatableClass(LReg))
+ // Ignore other live-ins, e.g. those that are live into landing pads.
+ LiveOuts.insert(LReg);
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
+ HandlePhysRegDef(i, nullptr, Defs);
+}
+
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MRI = &mf.getRegInfo();
- TRI = MF->getTarget().getRegisterInfo();
-
- unsigned NumRegs = TRI->getNumRegs();
- PhysRegDef = new MachineInstr*[NumRegs];
- PhysRegUse = new MachineInstr*[NumRegs];
- PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
- std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr);
- std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr);
+ TRI = MF->getSubtarget().getRegisterInfo();
+
+ const unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef.assign(NumRegs, nullptr);
+ PhysRegUse.assign(NumRegs, nullptr);
+ PHIVarInfo.resize(MF->getNumBlockIDs());
PHIJoins.clear();
// FIXME: LiveIntervals will be updated to remove its dependence on
@@ -525,124 +642,11 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MachineBasicBlock *Entry = MF->begin();
SmallPtrSet<MachineBasicBlock*,16> Visited;
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
- DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
- DFI != E; ++DFI) {
- MachineBasicBlock *MBB = *DFI;
-
- // Mark live-in registers as live-in.
- SmallVector<unsigned, 4> Defs;
- for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
- EE = MBB->livein_end(); II != EE; ++II) {
- assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
- "Cannot have a live-in virtual register!");
- HandlePhysRegDef(*II, nullptr, Defs);
- }
-
- // Loop over all of the instructions, processing them.
- DistanceMap.clear();
- unsigned Dist = 0;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- MachineInstr *MI = I;
- if (MI->isDebugValue())
- continue;
- DistanceMap.insert(std::make_pair(MI, Dist++));
-
- // Process all of the operands of the instruction...
- unsigned NumOperandsToProcess = MI->getNumOperands();
-
- // Unless it is a PHI node. In this case, ONLY process the DEF, not any
- // of the uses. They will be handled in other basic blocks.
- if (MI->isPHI())
- NumOperandsToProcess = 1;
-
- // Clear kill and dead markers. LV will recompute them.
- SmallVector<unsigned, 4> UseRegs;
- SmallVector<unsigned, 4> DefRegs;
- SmallVector<unsigned, 1> RegMasks;
- for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isRegMask()) {
- RegMasks.push_back(i);
- continue;
- }
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
- unsigned MOReg = MO.getReg();
- if (MO.isUse()) {
- MO.setIsKill(false);
- if (MO.readsReg())
- UseRegs.push_back(MOReg);
- } else /*MO.isDef()*/ {
- MO.setIsDead(false);
- DefRegs.push_back(MOReg);
- }
- }
-
- // Process all uses.
- for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
- unsigned MOReg = UseRegs[i];
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
- HandleVirtRegUse(MOReg, MBB, MI);
- else if (!MRI->isReserved(MOReg))
- HandlePhysRegUse(MOReg, MI);
- }
-
- // Process all masked registers. (Call clobbers).
- for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
- HandleRegMask(MI->getOperand(RegMasks[i]));
-
- // Process all defs.
- for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
- unsigned MOReg = DefRegs[i];
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
- HandleVirtRegDef(MOReg, MI);
- else if (!MRI->isReserved(MOReg))
- HandlePhysRegDef(MOReg, MI, Defs);
- }
- UpdatePhysRegDefs(MI, Defs);
- }
-
- // Handle any virtual assignments from PHI nodes which might be at the
- // bottom of this basic block. We check all of our successor blocks to see
- // if they have PHI nodes, and if so, we simulate an assignment at the end
- // of the current block.
- if (!PHIVarInfo[MBB->getNumber()].empty()) {
- SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()];
-
- for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(),
- E = VarInfoVec.end(); I != E; ++I)
- // Mark it alive only in the block we are representing.
- MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
- MBB);
- }
-
- // MachineCSE may CSE instructions which write to non-allocatable physical
- // registers across MBBs. Remember if any reserved register is liveout.
- SmallSet<unsigned, 4> LiveOuts;
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
- if (SuccMBB->isLandingPad())
- continue;
- for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
- LE = SuccMBB->livein_end(); LI != LE; ++LI) {
- unsigned LReg = *LI;
- if (!TRI->isInAllocatableClass(LReg))
- // Ignore other live-ins, e.g. those that are live into landing pads.
- LiveOuts.insert(LReg);
- }
- }
-
- // Loop over PhysRegDef / PhysRegUse, killing any registers that are
- // available at the end of the basic block.
- for (unsigned i = 0; i != NumRegs; ++i)
- if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
- HandlePhysRegDef(i, nullptr, Defs);
+ for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
+ runOnBlock(MBB, NumRegs);
- std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr);
- std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr);
+ PhysRegDef.assign(NumRegs, nullptr);
+ PhysRegUse.assign(NumRegs, nullptr);
}
// Convert and transfer the dead / killed information we have gathered into
@@ -664,9 +668,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
assert(Visited.count(&*i) != 0 && "unreachable basic block found");
#endif
- delete[] PhysRegDef;
- delete[] PhysRegUse;
- delete[] PHIVarInfo;
+ PhysRegDef.clear();
+ PhysRegUse.clear();
+ PHIVarInfo.clear();
return false;
}
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 36885e8..5c5712f 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -102,7 +103,7 @@ INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned LocalObjectCount = MFI->getObjectIndexEnd();
// If the target doesn't want/need this pass, or if there are no locals
@@ -183,7 +184,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *MFI = Fn.getFrameInfo();
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int64_t Offset = 0;
@@ -272,8 +273,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
bool UsedBaseReg = false;
MachineFrameInfo *MFI = Fn.getFrameInfo();
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 08fef5f..3058b1a 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -32,6 +32,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -54,7 +55,8 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
const TargetMachine &TM = MF->getTarget();
- const char *Prefix = TM.getDataLayout()->getPrivateGlobalPrefix();
+ const char *Prefix =
+ TM.getSubtargetImpl()->getDataLayout()->getPrivateGlobalPrefix();
CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
"_" + Twine(getNumber()));
@@ -290,7 +292,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << '\n';
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
if (!livein_empty()) {
if (Indexes) OS << '\t';
OS << " Live Ins:";
@@ -359,7 +361,7 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
bool LiveIn = isLiveIn(PhysReg);
iterator I = SkipPHIsAndLabels(begin()), E = end();
MachineRegisterInfo &MRI = getParent()->getRegInfo();
- const TargetInstrInfo &TII = *getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo();
// Look for an existing copy.
if (LiveIn)
@@ -390,7 +392,7 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
}
void MachineBasicBlock::updateTerminator() {
- const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
// A block with no successors has no concerns with fall-through edges.
if (this->succ_empty()) return;
@@ -645,7 +647,7 @@ bool MachineBasicBlock::canFallThrough() {
// Analyze the branches, if any, at the end of the block.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
// If we couldn't analyze the branch, examine the last instruction.
// If the block doesn't end in a known control barrier, assume fallthrough
@@ -690,7 +692,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// We may need to update this's terminator, but we can't do that if
// AnalyzeBranch fails. If this uses a jump table, we won't touch it.
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
@@ -795,7 +797,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addSuccessor(Succ);
if (!NMBB->isLayoutSuccessor(Succ)) {
Cond.clear();
- MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl);
+ MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond,
+ dl);
if (Indexes) {
for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
@@ -823,7 +826,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addLiveIn(*I);
// Update LiveVariables.
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
if (LV) {
// Restore kills of virtual registers that were killed by the terminators.
while (!KilledRegs.empty()) {
@@ -903,31 +906,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
}
if (MachineDominatorTree *MDT =
- P->getAnalysisIfAvailable<MachineDominatorTree>()) {
- // Update dominator information.
- MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
-
- bool IsNewIDom = true;
- for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
- PI != E; ++PI) {
- MachineBasicBlock *PredBB = *PI;
- if (PredBB == NMBB)
- continue;
- if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
- IsNewIDom = false;
- break;
- }
- }
-
- // We know "this" dominates the newly created basic block.
- MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
-
- // If all the other predecessors of "Succ" are dominated by "Succ" itself
- // then the new block is the new immediate dominator of "Succ". Otherwise,
- // the new block doesn't dominate anything.
- if (IsNewIDom)
- MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
- }
+ P->getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->recordSplitCriticalEdge(this, Succ, NMBB);
if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
if (MachineLoop *TIL = MLI->getLoopFor(this)) {
@@ -1086,7 +1066,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
MachineBasicBlock::succ_iterator SI = succ_begin();
while (SI != succ_end()) {
const MachineBasicBlock *MBB = *SI;
- if (!SeenMBBs.insert(MBB) ||
+ if (!SeenMBBs.insert(MBB).second ||
(MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
// This is a superfluous edge, remove it.
SI = removeSuccessor(SI);
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 74af1e2..08fd200 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -812,7 +813,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
BE = L.block_end();
BI != BE; ++BI) {
BlockChain &Chain = *BlockToChain[*BI];
- if (!UpdatedPreds.insert(&Chain))
+ if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
@@ -913,7 +914,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
MachineBasicBlock *BB = &*FI;
BlockChain &Chain = *BlockToChain[BB];
- if (!UpdatedPreds.insert(&Chain))
+ if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
@@ -1111,8 +1112,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();
- TII = F.getTarget().getInstrInfo();
- TLI = F.getTarget().getTargetLowering();
+ TII = F.getSubtarget().getInstrInfo();
+ TLI = F.getSubtarget().getTargetLowering();
assert(BlockToChain.empty());
buildCFGChains(F);
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index c2ab76e..ae26967 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "machine-cse"
@@ -78,7 +79,8 @@ namespace {
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
- bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool PerformTrivialCopyPropagation(MachineInstr *MI,
+ MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator E) const;
@@ -112,8 +114,12 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
-bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
- MachineBasicBlock *MBB) {
+/// The source register of a COPY machine instruction can be propagated to all
+/// its users, and this propagation could increase the probability of finding
+/// common subexpressions. If the COPY has only one user, the COPY itself can
+/// be removed.
+bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
bool Changed = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -122,10 +128,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (!MRI->hasOneNonDBGUse(Reg))
- // Only coalesce single use copies. This ensure the copy will be
- // deleted.
- continue;
+ bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI->isCopy())
continue;
@@ -153,10 +156,14 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
continue;
DEBUG(dbgs() << "Coalescing: " << *DefMI);
DEBUG(dbgs() << "*** to: " << *MI);
+ // Propagate SrcReg of copies to MI.
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
- DefMI->eraseFromParent();
- ++NumCoalesces;
+ // Coalesce single use copies.
+ if (OnlyOneUse) {
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ }
Changed = true;
}
@@ -453,13 +460,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool FoundCSE = VNT.count(MI);
if (!FoundCSE) {
- // Look for trivial copy coalescing opportunities.
- if (PerformTrivialCoalescing(MI, MBB)) {
+ // Using trivial copy propagation to find more CSE opportunities.
+ if (PerformTrivialCopyPropagation(MI, MBB)) {
Changed = true;
// After coalescing MI itself may become a copy.
if (MI->isCopyLike())
continue;
+
+ // Try again to see if CSE is possible.
FoundCSE = VNT.count(MI);
}
}
@@ -663,8 +672,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipOptnoneFunction(*MF.getFunction()))
return false;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<MachineDominatorTree>();
diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/CodeGen/MachineCodeEmitter.cpp
deleted file mode 100644
index 81b4978..0000000
--- a/lib/CodeGen/MachineCodeEmitter.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-
-using namespace llvm;
-
-void MachineCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
new file mode 100644
index 0000000..2931258
--- /dev/null
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -0,0 +1,435 @@
+//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The machine combiner pass uses machine trace metrics to ensure the combined
+// instructions does not lengthen the critical path or the resource depth.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "machine-combiner"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumInstCombined, "Number of machineinst combined");
+
+namespace {
+class MachineCombiner : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MCSchedModel SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+
+ TargetSchedModel TSchedModel;
+
+ /// OptSize - True if optimizing for code size.
+ bool OptSize;
+
+public:
+ static char ID;
+ MachineCombiner() : MachineFunctionPass(ID) {
+ initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const char *getPassName() const override { return "Machine InstCombiner"; }
+
+private:
+ bool doSubstitute(unsigned NewSize, unsigned OldSize);
+ bool combineInstructions(MachineBasicBlock *);
+ MachineInstr *getOperandDef(const MachineOperand &MO);
+ unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace);
+ unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace);
+ bool
+ preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg);
+ bool preservesResourceLen(MachineBasicBlock *MBB,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs);
+ void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
+};
+}
+
+char MachineCombiner::ID = 0;
+char &llvm::MachineCombinerID = MachineCombiner::ID;
+
+INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
+ "Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
+ false, false)
+
+void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
+ MachineInstr *DefInstr = nullptr;
+ // We need a virtual register definition.
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ DefInstr = MRI->getUniqueVRegDef(MO.getReg());
+ // PHI's have no depth etc.
+ if (DefInstr && DefInstr->isPHI())
+ DefInstr = nullptr;
+ return DefInstr;
+}
+
+/// getDepth - Computes depth of instructions in vector \InsInstr.
+///
+/// \param InsInstrs is a vector of machine instructions
+/// \param InstrIdxForVirtReg is a dense map of virtual register to index
+/// of defining machine instruction in \p InsInstrs
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Depth of last instruction in \InsInstrs ("NewRoot")
+unsigned
+MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace) {
+
+ SmallVector<unsigned, 16> InstrDepth;
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+
+ // Foreach instruction in in the new sequence compute the depth based on the
+ // operands. Use the trace information when possible. For new operands which
+ // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
+ for (auto *InstrPtr : InsInstrs) { // for each Use
+ unsigned IDepth = 0;
+ DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
+ for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = InstrPtr->getOperand(i);
+ // Check for virtual register operand.
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+ if (!MO.isUse())
+ continue;
+ unsigned DepthOp = 0;
+ unsigned LatencyOp = 0;
+ DenseMap<unsigned, unsigned>::iterator II =
+ InstrIdxForVirtReg.find(MO.getReg());
+ if (II != InstrIdxForVirtReg.end()) {
+ // Operand is new virtual register not in trace
+ assert(II->second < InstrDepth.size() && "Bad Index");
+ MachineInstr *DefInstr = InsInstrs[II->second];
+ assert(DefInstr &&
+ "There must be a definition for a new virtual register");
+ DepthOp = InstrDepth[II->second];
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ MachineInstr *DefInstr = getOperandDef(MO);
+ if (DefInstr) {
+ DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ }
+ }
+ IDepth = std::max(IDepth, DepthOp + LatencyOp);
+ }
+ InstrDepth.push_back(IDepth);
+ }
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ return InstrDepth[NewRootIdx];
+}
+
+/// getLatency - Computes instruction latency as max of latency of defined
+/// operands
+///
+/// \param Root is a machine instruction that could be replaced by NewRoot.
+/// It is used to compute a more accurate latency information for NewRoot in
+/// case there is a dependent instruction in the same trace (\p BlockTrace)
+/// \param NewRoot is the instruction for which the latency is computed
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Latency of \p NewRoot
+unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace) {
+
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+
+ // Check each definition in NewRoot and compute the latency
+ unsigned NewRootLatency = 0;
+
+ for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = NewRoot->getOperand(i);
+ // Check for virtual register operand.
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+ if (!MO.isDef())
+ continue;
+ // Get the first instruction that uses MO
+ MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
+ RI++;
+ MachineInstr *UseMO = RI->getParent();
+ unsigned LatencyOp = 0;
+ if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
+ LatencyOp = TSchedModel.computeOperandLatency(
+ NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
+ UseMO->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
+ }
+ NewRootLatency = std::max(NewRootLatency, LatencyOp);
+ }
+ return NewRootLatency;
+}
+
+/// preservesCriticalPathlen - True when the new instruction sequence does not
+/// lengthen the critical path. The DAGCombine code sequence ends in MI
+/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
+/// necessary condition for the new sequence to replace the old sequence is that
+/// is cannot lengthen the critical path. This is decided by the formula
+/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
+/// The slack is the number of cycles Root can be delayed before the critical
+/// patch becomes longer.
+bool MachineCombiner::preservesCriticalPathLen(
+ MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ // NewRoot is the last instruction in the \p InsInstrs vector
+ // Get depth and latency of NewRoot
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ MachineInstr *NewRoot = InsInstrs[NewRootIdx];
+ unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
+ unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
+
+ // Get depth, latency and slack of Root
+ unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
+ unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
+ unsigned RootSlack = BlockTrace.getInstrSlack(Root);
+
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ dbgs() << " NewRootDepth: " << NewRootDepth
+ << " NewRootLatency: " << NewRootLatency << "\n";
+ dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
+ << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " NewRootDepth + NewRootLatency "
+ << NewRootDepth + NewRootLatency << "\n";
+ dbgs() << " RootDepth + RootLatency + RootSlack "
+ << RootDepth + RootLatency + RootSlack << "\n";);
+
+ /// True when the new sequence does not lenghten the critical path.
+ return ((NewRootDepth + NewRootLatency) <=
+ (RootDepth + RootLatency + RootSlack));
+}
+
+/// helper routine to convert instructions into SC
+void MachineCombiner::instr2instrSC(
+ SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
+ for (auto *InstrPtr : Instrs) {
+ unsigned Opc = InstrPtr->getOpcode();
+ unsigned Idx = TII->get(Opc).getSchedClass();
+ const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
+ InstrsSC.push_back(SC);
+ }
+}
+/// preservesResourceLen - True when the new instructions do not increase
+/// resource length
+bool MachineCombiner::preservesResourceLen(
+ MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs) {
+
+ // Compute current resource length
+
+ //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
+ SmallVector <const MachineBasicBlock *, 1> MBBarr;
+ MBBarr.push_back(MBB);
+ unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
+
+ // Deal with SC rather than Instructions.
+ SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
+ SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
+
+ instr2instrSC(InsInstrs, InsInstrsSC);
+ instr2instrSC(DelInstrs, DelInstrsSC);
+
+ ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
+ ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
+
+ // Compute new resource length
+ unsigned ResLenAfterCombine =
+ BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
+
+ DEBUG(dbgs() << "RESOURCE DATA: \n";
+ dbgs() << " resource len before: " << ResLenBeforeCombine
+ << " after: " << ResLenAfterCombine << "\n";);
+
+ return ResLenAfterCombine <= ResLenBeforeCombine;
+}
+
+/// \returns true when new instruction sequence should be generated
+/// independent if it lenghtens critical path or not
+bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
+ if (OptSize && (NewSize < OldSize))
+ return true;
+ if (!TSchedModel.hasInstrSchedModel())
+ return true;
+ return false;
+}
+
+/// combineInstructions - substitute a slow code sequence with a faster one by
+/// evaluating instruction combining pattern.
+/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
+/// combining based on machine trace metrics. Only combine a sequence of
+/// instructions when this neither lengthens the critical path nor increases
+/// resource pressure. When optimizing for codesize always combine when the new
+/// sequence is shorter.
+bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+
+ auto BlockIter = MBB->begin();
+
+ while (BlockIter != MBB->end()) {
+ auto &MI = *BlockIter++;
+
+ DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
+ SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern;
+ // The motivating example is:
+ //
+ // MUL Other MUL_op1 MUL_op2 Other
+ // \ / \ | /
+ // ADD/SUB => MADD/MSUB
+ // (=Root) (=NewRoot)
+
+ // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
+ // usually beneficial for code size it unfortunately can hurt performance
+ // when the ADD is on the critical path, but the MUL is not. With the
+ // substitution the MUL becomes part of the critical path (in form of the
+ // MADD) and can lengthen it on architectures where the MADD latency is
+ // longer than the ADD latency.
+ //
+ // For each instruction we check if it can be the root of a combiner
+ // pattern. Then for each pattern the new code sequence in form of MI is
+ // generated and evaluated. When the efficiency criteria (don't lengthen
+ // critical path, don't use more resources) is met the new sequence gets
+ // hooked up into the basic block before the old sequence is removed.
+ //
+ // The algorithm does not try to evaluate all patterns and pick the best.
+ // This is only an artificial restriction though. In practice there is
+ // mostly one pattern and hasPattern() can order patterns based on an
+ // internal cost heuristic.
+
+ if (TII->hasPattern(MI, Pattern)) {
+ for (auto P : Pattern) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
+ TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (!InsInstrs.size())
+ continue;
+ // Substitute when we optimize for codesize and the new sequence has
+ // fewer instructions OR
+ // the new sequence neither lenghten the critical path nor increases
+ // resource pressure.
+ if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
+ (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+ InstrIdxForVirtReg) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator) & MI,
+ (MachineInstr *)InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+
+ Changed = true;
+ ++NumInstCombined;
+
+ Traces->invalidate(MBB);
+ Traces->verifyAnalysis();
+ // Eagerly stop after the first pattern fired
+ break;
+ } else {
+ // Cleanup instructions of the alternative code sequence. There is no
+ // use for them.
+ for (auto *InstrPtr : InsInstrs) {
+ MachineFunction *MF = MBB->getParent();
+ MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
+ }
+ }
+ InstrIdxForVirtReg.clear();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &STI =
+ MF.getTarget().getSubtarget<TargetSubtargetInfo>();
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ SchedModel = STI.getSchedModel();
+ TSchedModel.init(SchedModel, &STI, TII);
+ MRI = &MF.getRegInfo();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
+
+ OptSize = MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+
+ DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
+ if (!TII->useMachineCombiner()) {
+ DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
+ return false;
+ }
+
+ bool Changed = false;
+
+ // Try to combine instructions.
+ for (auto &MBB : MF)
+ Changed |= combineInstructions(&MBB);
+
+ return Changed;
+}
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 3119a35..cbd6272 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "codegen-cp"
@@ -335,8 +336,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp
new file mode 100644
index 0000000..0bee846
--- /dev/null
+++ b/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -0,0 +1,54 @@
+//===- MachineDominanceFrontier.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Analysis/DominanceFrontierImpl.h"
+#include "llvm/CodeGen/Passes.h"
+
+
+using namespace llvm;
+
+namespace llvm {
+template class DominanceFrontierBase<MachineBasicBlock>;
+template class ForwardDominanceFrontierBase<MachineBasicBlock>;
+}
+
+
+char MachineDominanceFrontier::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier",
+ "Machine Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier",
+ "Machine Dominance Frontier Construction", true, true)
+
+MachineDominanceFrontier::MachineDominanceFrontier()
+ : MachineFunctionPass(ID),
+ Base() {
+ initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry());
+}
+
+char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID;
+
+bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ Base.analyze(getAnalysis<MachineDominatorTree>().getBase());
+ return false;
+}
+
+void MachineDominanceFrontier::releaseMemory() {
+ Base.releaseMemory();
+}
+
+void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 04c8ecb..df60cf3 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -35,6 +35,8 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ CriticalEdgesToSplit.clear();
+ NewBBs.clear();
DT->recalculate(F);
return false;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 6138aef..8a2b610 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -36,6 +36,7 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "codegen"
@@ -52,17 +53,19 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
}
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
- unsigned FunctionNum, MachineModuleInfo &mmi,
- GCModuleInfo* gmi)
- : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
- if (TM.getRegisterInfo())
- RegInfo = new (Allocator) MachineRegisterInfo(TM);
+ unsigned FunctionNum, MachineModuleInfo &mmi)
+ : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()),
+ MMI(mmi) {
+ if (STI->getRegisterInfo())
+ RegInfo = new (Allocator) MachineRegisterInfo(this);
else
RegInfo = nullptr;
MFInfo = nullptr;
- FrameInfo =
- new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack"));
+ FrameInfo = new (Allocator)
+ MachineFrameInfo(STI->getFrameLowering()->getStackAlignment(),
+ STI->getFrameLowering()->isStackRealignable(),
+ !F->hasFnAttribute("no-realign-stack"));
if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::StackAlignment))
@@ -70,13 +73,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
getStackAlignment(AttributeSet::FunctionIndex));
ConstantPool = new (Allocator) MachineConstantPool(TM);
- Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+ Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize))
Alignment = std::max(Alignment,
- TM.getTargetLowering()->getPrefFunctionAlignment());
+ STI->getTargetLowering()->getPrefFunctionAlignment());
FunctionNumber = FunctionNum;
JumpTableInfo = nullptr;
@@ -229,10 +232,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
MachineMemOperand *
MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
uint64_t s, unsigned base_alignment,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
const MDNode *Ranges) {
return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
- TBAAInfo, Ranges);
+ AAInfo, Ranges);
}
MachineMemOperand *
@@ -243,12 +246,12 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MachineMemOperand(MachinePointerInfo(MMO->getValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size,
- MMO->getBaseAlignment(), nullptr);
+ MMO->getBaseAlignment());
return new (Allocator)
MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size,
- MMO->getBaseAlignment(), nullptr);
+ MMO->getBaseAlignment());
}
MachineInstr::mmo_iterator
@@ -279,7 +282,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOStore,
(*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getTBAAInfo());
+ (*I)->getAAInfo());
Result[Index] = JustLoad;
}
++Index;
@@ -311,7 +314,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOLoad,
(*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getTBAAInfo());
+ (*I)->getAAInfo());
Result[Index] = JustStore;
}
++Index;
@@ -350,7 +353,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
// Print Constant Pool
ConstantPool->print(OS);
- const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = getSubtarget().getRegisterInfo();
if (RegInfo && !RegInfo->livein_empty()) {
OS << "Function Live Ins: ";
@@ -459,7 +462,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
/// normal 'L' label is returned.
MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate) const {
- const DataLayout *DL = getTarget().getDataLayout();
+ const DataLayout *DL = getSubtarget().getDataLayout();
assert(JumpTableInfo && "No jump tables");
assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
@@ -474,7 +477,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
/// base.
MCSymbol *MachineFunction::getPICBaseSymbol() const {
- const DataLayout *DL = getTarget().getDataLayout();
+ const DataLayout *DL = getSubtarget().getDataLayout();
return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
Twine(getFunctionNumber())+"$pb");
}
@@ -483,15 +486,11 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
// MachineFrameInfo implementation
//===----------------------------------------------------------------------===//
-const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const {
- return TM.getFrameLowering();
-}
-
/// ensureMaxAlignment - Make sure the function is at least Align bytes
/// aligned.
void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
- if (!getFrameLowering()->isStackRealignable() || !RealignOption)
- assert(Align <= getFrameLowering()->getStackAlignment() &&
+ if (!StackRealignable || !RealignOption)
+ assert(Align <= StackAlignment &&
"For targets without stack realignment, Align is out of limit!");
if (MaxAlignment < Align) MaxAlignment = Align;
}
@@ -513,11 +512,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
bool isSS, const AllocaInst *Alloca) {
assert(Size != 0 && "Cannot allocate zero size stack objects!");
- Alignment =
- clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Alignment, getFrameLowering()->getStackAlignment());
- Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca));
+ Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
+ Alignment, StackAlignment);
+ Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
+ !isSS));
int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
ensureMaxAlignment(Alignment);
@@ -530,9 +528,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
///
int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
unsigned Alignment) {
- Alignment = clampStackAlignment(
- !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
- getFrameLowering()->getStackAlignment());
+ Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
+ Alignment, StackAlignment);
CreateStackObject(Size, Alignment, true);
int Index = (int)Objects.size() - NumFixedObjects - 1;
ensureMaxAlignment(Alignment);
@@ -547,10 +544,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
const AllocaInst *Alloca) {
HasVarSizedObjects = true;
- Alignment = clampStackAlignment(
- !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
- getFrameLowering()->getStackAlignment());
- Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca));
+ Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
+ Alignment, StackAlignment);
+ Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
ensureMaxAlignment(Alignment);
return (int)Objects.size()-NumFixedObjects-1;
}
@@ -561,20 +557,18 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
/// index with a negative value.
///
int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
- bool Immutable) {
+ bool Immutable, bool isAliased) {
assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
// The alignment of the frame index can be determined from its offset from
// the incoming frame position. If the frame object is at offset 32 and
// the stack is guaranteed to be 16-byte aligned, then we know that the
// object is 16-byte aligned.
- unsigned StackAlign = getFrameLowering()->getStackAlignment();
- unsigned Align = MinAlign(SPOffset, StackAlign);
- Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Align, getFrameLowering()->getStackAlignment());
+ unsigned Align = MinAlign(SPOffset, StackAlignment);
+ Align = clampStackAlignment(!StackRealignable || !RealignOption, Align,
+ StackAlignment);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
/*isSS*/ false,
- /*Alloca*/ nullptr));
+ /*Alloca*/ nullptr, isAliased));
return -++NumFixedObjects;
}
@@ -582,15 +576,14 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
/// on the stack. Returns an index with a negative value.
int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
int64_t SPOffset) {
- unsigned StackAlign = getFrameLowering()->getStackAlignment();
- unsigned Align = MinAlign(SPOffset, StackAlign);
- Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Align, getFrameLowering()->getStackAlignment());
+ unsigned Align = MinAlign(SPOffset, StackAlignment);
+ Align = clampStackAlignment(!StackRealignable || !RealignOption, Align,
+ StackAlignment);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
/*Immutable*/ true,
/*isSS*/ true,
- /*Alloca*/ nullptr));
+ /*Alloca*/ nullptr,
+ /*isAliased*/ false));
return -++NumFixedObjects;
}
@@ -600,7 +593,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
const MachineFunction *MF = MBB->getParent();
assert(MF && "MBB must be part of a MachineFunction");
const TargetMachine &TM = MF->getTarget();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
BitVector BV(TRI->getNumRegs());
// Before CSI is calculated, no registers are considered pristine. They can be
@@ -625,8 +618,8 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
}
unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
unsigned MaxAlign = getMaxAlignment();
int Offset = 0;
@@ -676,7 +669,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
- const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
OS << "Frame Objects:\n";
@@ -820,7 +813,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
void MachineConstantPoolValue::anchor() { }
const DataLayout *MachineConstantPool::getDataLayout() const {
- return TM.getDataLayout();
+ return TM.getSubtargetImpl()->getDataLayout();
}
Type *MachineConstantPoolEntry::getType() const {
@@ -836,6 +829,37 @@ unsigned MachineConstantPoolEntry::getRelocationInfo() const {
return Val.ConstVal->getRelocationInfo();
}
+SectionKind
+MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
+ SectionKind Kind;
+ switch (getRelocationInfo()) {
+ default:
+ llvm_unreachable("Unknown section kind");
+ case 2:
+ Kind = SectionKind::getReadOnlyWithRel();
+ break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (DL->getTypeAllocSize(getType())) {
+ case 4:
+ Kind = SectionKind::getMergeableConst4();
+ break;
+ case 8:
+ Kind = SectionKind::getMergeableConst8();
+ break;
+ case 16:
+ Kind = SectionKind::getMergeableConst16();
+ break;
+ default:
+ Kind = SectionKind::getMergeableConst();
+ break;
+ }
+ }
+ return Kind;
+}
+
MachineConstantPool::~MachineConstantPool() {
for (unsigned i = 0, e = Constants.size(); i != e; ++i)
if (Constants[i].isMachineConstantPoolEntry())
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 46cd60a..f6f34ba 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -46,8 +46,7 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) {
bool MachineFunctionAnalysis::runOnFunction(Function &F) {
assert(!MF && "MachineFunctionAnalysis already initialized!");
MF = new MachineFunction(&F, TM, NextFnNum++,
- getAnalysis<MachineModuleInfo>(),
- getAnalysisIfAvailable<GCModuleInfo>());
+ getAnalysis<MachineModuleInfo>());
return false;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 5122165..7ad0d94 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -39,6 +39,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -105,23 +106,41 @@ void MachineOperand::setIsDef(bool Val) {
IsDef = Val;
}
+// If this operand is currently a register operand, and if this is in a
+// function, deregister the operand from the register's use/def list.
+void MachineOperand::removeRegFromUses() {
+ if (!isReg() || !isOnRegUseList())
+ return;
+
+ if (MachineInstr *MI = getParent()) {
+ if (MachineBasicBlock *MBB = MI->getParent()) {
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
+ }
+ }
+}
+
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
- // If this operand is currently a register operand, and if this is in a
- // function, deregister the operand from the register's use/def list.
- if (isReg() && isOnRegUseList())
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent())
- MF->getRegInfo().removeRegOperandFromUseList(this);
+
+ removeRegFromUses();
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
}
+void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+ removeRegFromUses();
+
+ OpKind = MO_FPImmediate;
+ Contents.CFP = FPImm;
+}
+
/// ChangeToRegister - Replace this operand with a new register operand of
/// the specified value. If an operand is known to be an register already,
/// the setReg method should be used.
@@ -265,7 +284,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (const MachineBasicBlock *MBB = MI->getParent())
if (const MachineFunction *MF = MBB->getParent())
TM = &MF->getTarget();
- const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr;
+ const TargetRegisterInfo *TRI =
+ TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
switch (getType()) {
case MachineOperand::MO_Register:
@@ -429,11 +449,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
uint64_t s, unsigned int a,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
const MDNode *Ranges)
: PtrInfo(ptrinfo), Size(s),
Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
- TBAAInfo(TBAAInfo), Ranges(Ranges) {
+ AAInfo(AAInfo), Ranges(Ranges) {
assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) &&
"invalid pointer value");
@@ -514,7 +534,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
OS << "(align=" << MMO.getAlignment() << ")";
// Print TBAA info.
- if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+ if (const MDNode *TBAAInfo = MMO.getAAInfo().TBAA) {
OS << "(tbaa=";
if (TBAAInfo->getNumOperands() > 0)
TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
@@ -523,6 +543,34 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
OS << ")";
}
+ // Print AA scope info.
+ if (const MDNode *ScopeInfo = MMO.getAAInfo().Scope) {
+ OS << "(alias.scope=";
+ if (ScopeInfo->getNumOperands() > 0)
+ for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
+ ScopeInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false);
+ if (i != ie-1)
+ OS << ",";
+ }
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print AA noalias scope info.
+ if (const MDNode *NoAliasInfo = MMO.getAAInfo().NoAlias) {
+ OS << "(noalias=";
+ if (NoAliasInfo->getNumOperands() > 0)
+ for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
+ NoAliasInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false);
+ if (i != ie-1)
+ OS << ",";
+ }
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
// Print nontemporal info.
if (MMO.isNonTemporal())
OS << "(nontemporal)";
@@ -865,6 +913,27 @@ void MachineInstr::eraseFromParent() {
getParent()->erase(this);
}
+void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
+ assert(getParent() && "Not embedded in a basic block!");
+ MachineBasicBlock *MBB = getParent();
+ MachineFunction *MF = MBB->getParent();
+ assert(MF && "Not embedded in a function!");
+
+ MachineInstr *MI = (MachineInstr *)this;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MRI.markUsesInDebugValueAsUndef(Reg);
+ }
+ MI->eraseFromParent();
+}
+
void MachineInstr::eraseFromBundle() {
assert(getParent() && "Not embedded in a basic block!");
getParent()->erase_instr(this);
@@ -1379,7 +1448,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
// If we have an AliasAnalysis, ask it whether the memory is constant.
if (AA && AA->pointsToConstantMemory(
AliasAnalysis::Location(V, (*I)->getSize(),
- (*I)->getTBAAInfo())))
+ (*I)->getAAInfo())))
continue;
}
@@ -1489,8 +1558,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
OS << " = ";
// Print the opcode name.
- if (TM && TM->getInstrInfo())
- OS << TM->getInstrInfo()->getName(getOpcode());
+ if (TM && TM->getSubtargetImpl()->getInstrInfo())
+ OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode());
else
OS << "UNKNOWN";
@@ -1538,17 +1607,17 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
// call instructions much less noisy on targets where calls clobber lots
// of registers. Don't rely on MO.isDead() because we may be called before
// LiveVariables is run, or we may be looking at a non-allocatable reg.
- if (MF && isCall() &&
+ if (MRI && isCall() &&
MO.isReg() && MO.isImplicit() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- if (MRI.use_empty(Reg)) {
+ if (MRI->use_empty(Reg)) {
bool HasAliasLive = false;
- for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true);
+ for (MCRegAliasIterator AI(
+ Reg, TM->getSubtargetImpl()->getRegisterInfo(), true);
AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
- if (!MRI.use_empty(AliasReg)) {
+ if (!MRI->use_empty(AliasReg)) {
HasAliasLive = true;
break;
}
@@ -1573,12 +1642,16 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (isDebugValue() && MO.isMetadata()) {
// Pretty print DBG_VALUE instructions.
const MDNode *MD = MO.getMetadata();
- if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
- OS << "!\"" << MDS->getString() << '\"';
+ DIDescriptor DI(MD);
+ DIVariable DIV(MD);
+
+ if (DI.isVariable() && !DIV.getName().empty())
+ OS << "!\"" << DIV.getName() << '\"';
else
MO.print(OS, TM);
} else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
- OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+ OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName(
+ MO.getImm());
} else if (i == AsmDescOp && MO.isImm()) {
// Pretty print the inline asm operand descriptor.
OS << '$' << AsmOpCount++;
@@ -1595,9 +1668,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
unsigned RCID = 0;
if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
- if (TM)
- OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName();
- else
+ if (TM) {
+ const TargetRegisterInfo *TRI =
+ TM->getSubtargetImpl()->getRegisterInfo();
+ OS << ':'
+ << TRI->getRegClassName(TRI->getRegClass(RCID));
+ } else
OS << ":RC" << RCID;
}
@@ -1646,7 +1722,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (!HaveSemi) OS << ";"; HaveSemi = true;
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
- OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
+ OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC)
+ << ':' << PrintReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
if (MRI->getRegClass(VirtRegs[j]) != RC) {
++j;
@@ -1672,6 +1749,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
OS << " ]";
}
}
+ if (isIndirectDebugValue())
+ OS << " indirect";
} else if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";";
OS << " dbg:";
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 962169e..0690f08 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -16,6 +16,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
namespace {
@@ -103,12 +104,12 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
assert(FirstMI != LastMI && "Empty bundle?");
MIBundleBuilder Bundle(MBB, FirstMI, LastMI);
- const TargetMachine &TM = MBB.getParent()->getTarget();
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(),
- TII->get(TargetOpcode::BUNDLE));
+ MachineInstrBuilder MIB =
+ BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE));
Bundle.prepend(MIB);
SmallVector<unsigned, 32> LocalDefs;
@@ -140,7 +141,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
// Internal def is now killed.
KilledDefSet.insert(Reg);
} else {
- if (ExternUseSet.insert(Reg)) {
+ if (ExternUseSet.insert(Reg).second) {
ExternUses.push_back(Reg);
if (MO.isUndef())
UndefUseSet.insert(Reg);
@@ -157,7 +158,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!Reg)
continue;
- if (LocalDefSet.insert(Reg)) {
+ if (LocalDefSet.insert(Reg).second) {
LocalDefs.push_back(Reg);
if (MO.isDead()) {
DeadDefSet.insert(Reg);
@@ -173,7 +174,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!MO.isDead()) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
- if (LocalDefSet.insert(SubReg))
+ if (LocalDefSet.insert(SubReg).second)
LocalDefs.push_back(SubReg);
}
}
@@ -185,7 +186,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
SmallSet<unsigned, 32> Added;
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
- if (Added.insert(Reg)) {
+ if (Added.insert(Reg).second) {
// If it's not live beyond end of the bundle, mark it dead.
bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 94cdab5..2ab0467 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -39,6 +39,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "machine-licm"
@@ -61,7 +62,6 @@ STATISTIC(NumPostRAHoisted,
namespace {
class MachineLICM : public MachineFunctionPass {
- const TargetMachine *TM;
const TargetInstrInfo *TII;
const TargetLoweringBase *TLI;
const TargetRegisterInfo *TRI;
@@ -142,9 +142,6 @@ namespace {
RegPressure.clear();
RegLimit.clear();
BackTrace.clear();
- for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
- CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
- CI->second.clear();
CSEMap.clear();
}
@@ -324,13 +321,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
return false;
Changed = FirstInLoop = false;
- TM = &MF.getTarget();
- TII = TM->getInstrInfo();
- TLI = TM->getTargetLowering();
- TRI = TM->getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TLI = MF.getSubtarget().getTargetLowering();
+ TRI = MF.getSubtarget().getRegisterInfo();
MFI = MF.getFrameInfo();
MRI = &MF.getRegInfo();
- InstrItins = TM->getInstrItineraryData();
+ InstrItins = MF.getSubtarget().getInstrItineraryData();
PreRegAlloc = MRI->isSSA();
@@ -822,7 +818,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- bool isNew = RegSeen.insert(Reg);
+ bool isNew = RegSeen.insert(Reg).second;
unsigned RCId, RCCost;
getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
if (MO.isDef())
@@ -854,7 +850,7 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- bool isNew = RegSeen.insert(Reg);
+ bool isNew = RegSeen.insert(Reg).second;
if (MO.isDef())
Defs.push_back(Reg);
else if (!isNew && isOperandKill(MO, MRI)) {
@@ -1299,15 +1295,7 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
const MachineInstr *MI = &*I;
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
- CI = CSEMap.find(Opcode);
- if (CI != CSEMap.end())
- CI->second.push_back(MI);
- else {
- std::vector<const MachineInstr*> CSEMIs;
- CSEMIs.push_back(MI);
- CSEMap.insert(std::make_pair(Opcode, CSEMIs));
- }
+ CSEMap[Opcode].push_back(MI);
}
}
@@ -1447,11 +1435,8 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Add to the CSE map.
if (CI != CSEMap.end())
CI->second.push_back(MI);
- else {
- std::vector<const MachineInstr*> CSEMIs;
- CSEMIs.push_back(MI);
- CSEMap.insert(std::make_pair(Opcode, CSEMIs));
- }
+ else
+ CSEMap[Opcode].push_back(MI);
}
++NumHoisted;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 4976e35..eb3c0bf 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -270,7 +270,6 @@ MachineModuleInfo::~MachineModuleInfo() {
bool MachineModuleInfo::doInitialization(Module &M) {
ObjFileMMI = nullptr;
- CompactUnwindEncoding = 0;
CurCallSite = 0;
CallsEHReturn = 0;
CallsUnwindInit = 0;
@@ -312,7 +311,6 @@ void MachineModuleInfo::EndFunction() {
FilterEnds.clear();
CallsEHReturn = 0;
CallsUnwindInit = 0;
- CompactUnwindEncoding = 0;
VariableDbgInfos.clear();
}
@@ -429,7 +427,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
///
void MachineModuleInfo::
addCatchTypeInfo(MachineBasicBlock *LandingPad,
- ArrayRef<const GlobalVariable *> TyInfo) {
+ ArrayRef<const GlobalValue *> TyInfo) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
for (unsigned N = TyInfo.size(); N; --N)
LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
@@ -439,7 +437,7 @@ addCatchTypeInfo(MachineBasicBlock *LandingPad,
///
void MachineModuleInfo::
addFilterTypeInfo(MachineBasicBlock *LandingPad,
- ArrayRef<const GlobalVariable *> TyInfo) {
+ ArrayRef<const GlobalValue *> TyInfo) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
std::vector<unsigned> IdsInFilter(TyInfo.size());
for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
@@ -508,7 +506,7 @@ void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
/// getTypeIDFor - Return the type id for the specified typeinfo. This is
/// function wide.
-unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) {
for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
if (TypeInfos[i] == TI) return i + 1;
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
new file mode 100644
index 0000000..5a5035e
--- /dev/null
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -0,0 +1,140 @@
+
+#include "llvm/CodeGen/MachineRegionInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/RegionInfoImpl.h"
+
+#define DEBUG_TYPE "region"
+
+using namespace llvm;
+
+STATISTIC(numMachineRegions, "The # of machine regions");
+STATISTIC(numMachineSimpleRegions, "The # of simple machine regions");
+
+namespace llvm {
+template class RegionBase<RegionTraits<MachineFunction>>;
+template class RegionNodeBase<RegionTraits<MachineFunction>>;
+template class RegionInfoBase<RegionTraits<MachineFunction>>;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineRegion implementation
+//
+
+MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit,
+ MachineRegionInfo* RI,
+ MachineDominatorTree *DT, MachineRegion *Parent) :
+ RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {
+
+}
+
+MachineRegion::~MachineRegion() { }
+
+//===----------------------------------------------------------------------===//
+// MachineRegionInfo implementation
+//
+
+MachineRegionInfo::MachineRegionInfo() :
+ RegionInfoBase<RegionTraits<MachineFunction>>() {
+
+}
+
+MachineRegionInfo::~MachineRegionInfo() {
+
+}
+
+void MachineRegionInfo::updateStatistics(MachineRegion *R) {
+ ++numMachineRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple())
+ ++numMachineSimpleRegions;
+}
+
+void MachineRegionInfo::recalculate(MachineFunction &F,
+ MachineDominatorTree *DT_,
+ MachinePostDominatorTree *PDT_,
+ MachineDominanceFrontier *DF_) {
+ DT = DT_;
+ PDT = PDT_;
+ DF = DF_;
+
+ MachineBasicBlock *Entry = GraphTraits<MachineFunction*>::getEntryNode(&F);
+
+ TopLevelRegion = new MachineRegion(Entry, nullptr, this, DT, nullptr);
+ updateStatistics(TopLevelRegion);
+ calculate(F);
+}
+
+//===----------------------------------------------------------------------===//
+// MachineRegionInfoPass implementation
+//
+
+MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) {
+ initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry());
+}
+
+MachineRegionInfoPass::~MachineRegionInfoPass() {
+
+}
+
+bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
+ releaseMemory();
+
+ auto DT = &getAnalysis<MachineDominatorTree>();
+ auto PDT = &getAnalysis<MachinePostDominatorTree>();
+ auto DF = &getAnalysis<MachineDominanceFrontier>();
+
+ RI.recalculate(F, DT, PDT, DF);
+ return false;
+}
+
+void MachineRegionInfoPass::releaseMemory() {
+ RI.releaseMemory();
+}
+
+void MachineRegionInfoPass::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (MachineRegionInfo::VerifyRegionInfo)
+ RI.verifyAnalysis();
+}
+
+void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+}
+
+void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
+ RI.print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineRegionInfoPass::dump() const {
+ RI.dump();
+}
+#endif
+
+char MachineRegionInfoPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions",
+ "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(MachineRegionInfoPass, "regions",
+ "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+ FunctionPass *createMachineRegionInfoPass() {
+ return new MachineRegionInfoPass();
+ }
+}
+
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index f560259..e9612f3 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -16,28 +16,22 @@
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
// Pin the vtable to this file.
void MachineRegisterInfo::Delegate::anchor() {}
-MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM)
- : TM(TM), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) {
+MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
+ : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
// Create the physreg use/def lists.
- PhysRegUseDefLists =
- new MachineOperand*[getTargetRegisterInfo()->getNumRegs()];
- memset(PhysRegUseDefLists, 0,
- sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs());
-}
-
-MachineRegisterInfo::~MachineRegisterInfo() {
- delete [] PhysRegUseDefLists;
+ PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr);
}
/// setRegClass - Set the register class of the specified virtual register.
@@ -67,7 +61,7 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg,
bool
MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
- const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterClass *NewRC =
getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC);
@@ -283,18 +277,25 @@ void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
+/// If ToReg is a physical register we apply the sub register to obtain the
+/// final/proper physical register.
void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
assert(FromReg != ToReg && "Cannot replace a reg with itself");
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+
// TODO: This could be more efficient by bulk changing the operands.
for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
MachineOperand &O = *I;
++I;
- O.setReg(ToReg);
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ O.substPhysReg(ToReg, *TRI);
+ } else {
+ O.setReg(ToReg);
+ }
}
}
-
/// getVRegDef - Return the machine instr that defines the specified virtual
/// register or null if none is found. This assumes that the code is in SSA
/// form, so there should only be one definition.
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index d9173a2..71a6eba 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -24,8 +24,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
using namespace llvm;
@@ -39,7 +39,7 @@ static AvailableValsTy &getAvailableVals(void *AV) {
MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
SmallVectorImpl<MachineInstr*> *NewPHI)
: AV(nullptr), InsertedPHIs(NewPHI) {
- TII = MF.getTarget().getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 44191f7..261942f 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -40,6 +40,9 @@ cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
cl::desc("Force bottom-up list scheduling"));
+cl::opt<bool>
+DumpCriticalPathLength("misched-dcpl", cl::Hidden,
+ cl::desc("Print critical path length to stdout"));
}
#ifndef NDEBUG
@@ -378,7 +381,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
/// Main driver for both MachineScheduler and PostMachineScheduler.
void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
bool IsPostRA = Scheduler.isPostRA();
// Visit all machine basic blocks.
@@ -451,6 +454,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
else dbgs() << "End";
dbgs() << " RegionInstrs: " << NumRegionInstrs
<< " Remaining: " << RemainingInstrs << "\n");
+ if (DumpCriticalPathLength) {
+ errs() << MF->getName();
+ errs() << ":BB# " << MBB->getNumber();
+ errs() << " " << MBB->getName() << " \n";
+ }
// Schedule a region: possibly reorder instructions.
// This invalidates 'RegionEnd' and 'I'.
@@ -2355,14 +2363,15 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
- const TargetMachine &TM = DAG->MF.getTarget();
if (!Top.HazardRec) {
Top.HazardRec =
- TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
}
if (!Bot.HazardRec) {
Bot.HazardRec =
- TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
}
}
@@ -2370,8 +2379,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {
- const TargetMachine &TM = Context->MF->getTarget();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const MachineFunction &MF = *Begin->getParent()->getParent();
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
// Avoid setting up the register pressure tracker for small regions to save
// compile time. As a rough heuristic, only track pressure when the number of
@@ -2391,8 +2400,8 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
RegionPolicy.OnlyBottomUp = true;
// Allow the subtarget to override default policy.
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
- ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
+ NumRegionInstrs);
// After subtarget overrides, apply command line options.
if (!EnableRegPressure)
@@ -2460,7 +2469,10 @@ void GenericScheduler::registerRoots() {
if ((*I)->getDepth() > Rem.CriticalPath)
Rem.CriticalPath = (*I)->getDepth();
}
- DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+ DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
+ }
if (EnableCyclicPath) {
Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
@@ -2482,8 +2494,8 @@ static bool tryPressure(const PressureChange &TryP,
}
// If one candidate decreases and the other increases, go with it.
// Invalid candidates have UnitInc==0.
- if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
- Reason)) {
+ if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
+ Reason)) {
return true;
}
// If the candidates are decreasing pressure, reverse priority.
@@ -2885,10 +2897,10 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
// Initialize the HazardRecognizers. If itineraries don't exist, are empty,
// or are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
- const TargetMachine &TM = DAG->MF.getTarget();
if (!Top.HazardRec) {
Top.HazardRec =
- TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
}
}
@@ -2902,7 +2914,10 @@ void PostGenericScheduler::registerRoots() {
if ((*I)->getDepth() > Rem.CriticalPath)
Rem.CriticalPath = (*I)->getDepth();
}
- DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+ DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
+ }
}
/// Apply a set of heursitics to a new candidate for PostRA scheduling.
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 0ae495c..ba25bca 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -17,18 +17,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "machine-sink"
@@ -38,6 +41,12 @@ SplitEdges("machine-sink-split",
cl::desc("Split critical edges during machine sinking"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+UseBlockFreqInfo("machine-sink-bfi",
+ cl::desc("Use block frequency info to find successors to sink"),
+ cl::init(true), cl::Hidden);
+
+
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -46,14 +55,20 @@ namespace {
class MachineSinking : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI; // Machine register information
- MachineDominatorTree *DT; // Machine dominator tree
+ MachineRegisterInfo *MRI; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree
+ MachinePostDominatorTree *PDT; // Machine post dominator tree
MachineLoopInfo *LI;
+ const MachineBlockFrequencyInfo *MBFI;
AliasAnalysis *AA;
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
CEBCandidates;
+ // Remember which edges we are about to split.
+ // This is different from CEBCandidates since those edges
+ // will be split.
+ SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit;
public:
static char ID; // Pass identification
@@ -68,9 +83,13 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
+ if (UseBlockFreqInfo)
+ AU.addRequired<MachineBlockFrequencyInfo>();
}
void releaseMemory() override {
@@ -82,10 +101,22 @@ namespace {
bool isWorthBreakingCriticalEdge(MachineInstr *MI,
MachineBasicBlock *From,
MachineBasicBlock *To);
- MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
- MachineBasicBlock *From,
- MachineBasicBlock *To,
- bool BreakPHIEdge);
+ /// \brief Postpone the splitting of the given critical
+ /// edge (\p From, \p To).
+ ///
+ /// We do not split the edges on the fly. Indeed, this invalidates
+ /// the dominance information and thus triggers a lot of updates
+ /// of that information underneath.
+ /// Instead, we postpone all the splits after each iteration of
+ /// the main loop. That way, the information is at least valid
+ /// for the lifetime of an iteration.
+ ///
+ /// \return True if the edge is marked as toSplit, false otherwise.
+ /// False can be retruned if, for instance, this is not profitable.
+ bool PostponeSplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
bool SinkInstruction(MachineInstr *MI, bool &SawStore);
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
@@ -135,6 +166,11 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
DEBUG(dbgs() << "*** to: " << *MI);
MRI->replaceRegWith(DstReg, SrcReg);
MI->eraseFromParent();
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ MRI->clearKillFlags(SrcReg);
+
++NumCoalesces;
return true;
}
@@ -213,12 +249,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "******** Machine Sinking ********\n");
- const TargetMachine &TM = MF.getTarget();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
+ MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
AA = &getAnalysis<AliasAnalysis>();
bool EverMadeChange = false;
@@ -228,10 +265,24 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// Process all basic blocks.
CEBCandidates.clear();
+ ToSplit.clear();
for (MachineFunction::iterator I = MF.begin(), E = MF.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
+ // If we have anything we marked as toSplit, split it now.
+ for (auto &Pair : ToSplit) {
+ auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, this);
+ if (NewSucc != nullptr) {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << Pair.first->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << Pair.second->getNumber() << '\n');
+ MadeChange = true;
+ ++NumSplit;
+ } else
+ DEBUG(dbgs() << " *** Not legal to break critical edge\n");
+ }
// If this iteration over the code changed anything, keep iterating.
if (!MadeChange) break;
EverMadeChange = true;
@@ -289,7 +340,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
// If the pass has already considered breaking this edge (during this pass
// through the function), then let's go ahead and break it. This means
// sinking multiple "cheap" instructions into the same block.
- if (!CEBCandidates.insert(std::make_pair(From, To)))
+ if (!CEBCandidates.insert(std::make_pair(From, To)).second)
return true;
if (!MI->isCopy() && !TII->isAsCheapAsAMove(MI))
@@ -328,21 +379,21 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
return false;
}
-MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
- MachineBasicBlock *FromBB,
- MachineBasicBlock *ToBB,
- bool BreakPHIEdge) {
+bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
- return nullptr;
+ return false;
// Avoid breaking back edge. From == To means backedge for single BB loop.
if (!SplitEdges || FromBB == ToBB)
- return nullptr;
+ return false;
// Check for backedges of more "complex" loops.
if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
LI->isLoopHeader(ToBB))
- return nullptr;
+ return false;
// It's not always legal to break critical edges and sink the computation
// to the edge.
@@ -389,11 +440,13 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
if (*PI == FromBB)
continue;
if (!DT->dominates(ToBB, *PI))
- return nullptr;
+ return false;
}
}
- return FromBB->SplitCriticalEdge(ToBB, this);
+ ToSplit.insert(std::make_pair(FromBB, ToBB));
+
+ return true;
}
static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
@@ -419,23 +472,6 @@ static void collectDebugValues(MachineInstr *MI,
}
}
-/// isPostDominatedBy - Return true if A is post dominated by B.
-static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
-
- // FIXME - Use real post dominator.
- if (A->succ_size() != 2)
- return false;
- MachineBasicBlock::succ_iterator I = A->succ_begin();
- if (B == *I)
- ++I;
- MachineBasicBlock *OtherSuccBlock = *I;
- if (OtherSuccBlock->succ_size() != 1 ||
- *(OtherSuccBlock->succ_begin()) != B)
- return false;
-
- return true;
-}
-
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -447,8 +483,13 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
return false;
// It is profitable if SuccToSinkTo does not post dominate current block.
- if (!isPostDominatedBy(MBB, SuccToSinkTo))
- return true;
+ if (!PDT->dominates(SuccToSinkTo, MBB))
+ return true;
+
+ // It is profitable to sink an instruction from a deeper loop to a shallower
+ // loop, even if the latter post-dominates the former (PR21115).
+ if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
+ return true;
// Check if only use in post dominated block is PHI instruction.
bool NonPHIUse = false;
@@ -539,14 +580,20 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
}
// Otherwise, we should look at all the successors and decide which one
- // we should sink to.
- // We give successors with smaller loop depth higher priority.
- SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
- // Sort Successors according to their loop depth.
+ // we should sink to. If we have reliable block frequency information
+ // (frequency != 0) available, give successors with smaller frequencies
+ // higher priority, otherwise prioritize smaller loop depths.
+ SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+ // Sort Successors according to their loop depth or block frequency info.
std::stable_sort(
Succs.begin(), Succs.end(),
- [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) {
- return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+ [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+ uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
+ uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
+ bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
+ return HasBlockFreq ? LHSFreq < RHSFreq
+ : LI->getLoopDepth(L) < LI->getLoopDepth(R);
});
for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(),
E = Succs.end(); SI != E; ++SI) {
@@ -655,21 +702,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (!TryBreak)
DEBUG(dbgs() << "Sinking along critical edge.\n");
else {
- MachineBasicBlock *NewSucc =
- SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
- if (!NewSucc) {
+ // Mark this edge as to be split.
+ // If the edge can actually be split, the next iteration of the main loop
+ // will sink MI in the newly created block.
+ bool Status =
+ PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+ if (!Status)
DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
- "break critical edge\n");
- return false;
- } else {
- DEBUG(dbgs() << " *** Splitting critical edge:"
- " BB#" << ParentBlock->getNumber()
- << " -- BB#" << NewSucc->getNumber()
- << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
- SuccToSinkTo = NewSucc;
- ++NumSplit;
- BreakPHIEdge = false;
- }
+ "break critical edge\n");
+ // The instruction will not be sunk this time.
+ return false;
}
}
@@ -677,20 +719,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// BreakPHIEdge is true if all the uses are in the successor MBB being
// sunken into and they are all PHI nodes. In this case, machine-sink must
// break the critical edge first.
- MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
- SuccToSinkTo, BreakPHIEdge);
- if (!NewSucc) {
+ bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!Status)
DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
"break critical edge\n");
- return false;
- }
-
- DEBUG(dbgs() << " *** Splitting critical edge:"
- " BB#" << ParentBlock->getNumber()
- << " -- BB#" << NewSucc->getNumber()
- << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
- SuccToSinkTo = NewSucc;
- ++NumSplit;
+ // The instruction will not be sunk this time.
+ return false;
}
// Determine where to insert into. Skip phi nodes.
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 1bbf0ad..2cf87eb 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -52,13 +52,13 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
- TII = MF->getTarget().getInstrInfo();
- TRI = MF->getTarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
MRI = &MF->getRegInfo();
Loops = &getAnalysis<MachineLoopInfo>();
const TargetSubtargetInfo &ST =
MF->getTarget().getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
BlockInfo.resize(MF->getNumBlockIDs());
ProcResourceCycles.resize(MF->getNumBlockIDs() *
SchedModel.getNumProcResourceKinds());
@@ -135,8 +135,7 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
"getResources() must be called before getProcResourceCycles()");
unsigned PRKinds = SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
- return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds,
- PRKinds);
+ return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
}
@@ -256,8 +255,7 @@ MachineTraceMetrics::Ensemble::
getProcResourceDepths(unsigned MBBNum) const {
unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
- return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds,
- PRKinds);
+ return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
}
/// Get an array of processor resource heights for MBB. Indexed by processor
@@ -270,8 +268,7 @@ MachineTraceMetrics::Ensemble::
getProcResourceHeights(unsigned MBBNum) const {
unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
- return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds,
- PRKinds);
+ return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
}
//===----------------------------------------------------------------------===//
@@ -452,7 +449,7 @@ public:
}
// To is a new block. Mark the block as visited in case the CFG has cycles
// that MachineLoopInfo didn't recognize as a natural loop.
- return LB.Visited.insert(To);
+ return LB.Visited.insert(To).second;
}
};
}
@@ -1169,6 +1166,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
return DepCycle;
}
+/// When bottom is set include instructions in current block in estimate.
unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
// Find the limiting processor resource.
// Numbers have been pre-scaled to be comparable.
@@ -1185,7 +1183,9 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
// Convert to cycle count.
PRMax = TE.MTM.getCycles(PRMax);
+ /// All instructions before current block
unsigned Instrs = TBI.InstrDepth;
+ // plus instructions in current block
if (Bottom)
Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
@@ -1194,44 +1194,72 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
return std::max(Instrs, PRMax);
}
-
-unsigned MachineTraceMetrics::Trace::
-getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks,
- ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const {
+unsigned MachineTraceMetrics::Trace::getResourceLength(
+ ArrayRef<const MachineBasicBlock *> Extrablocks,
+ ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
+ ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
// Add up resources above and below the center block.
ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
unsigned PRMax = 0;
- for (unsigned K = 0; K != PRDepths.size(); ++K) {
- unsigned PRCycles = PRDepths[K] + PRHeights[K];
- for (unsigned I = 0; I != Extrablocks.size(); ++I)
- PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
- for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
- const MCSchedClassDesc* SC = ExtraInstrs[I];
+
+ // Capture computing cycles from extra instructions
+ auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
+ unsigned ResourceIdx)
+ ->unsigned {
+ unsigned Cycles = 0;
+ for (unsigned I = 0; I != Instrs.size(); ++I) {
+ const MCSchedClassDesc *SC = Instrs[I];
if (!SC->isValid())
continue;
for (TargetSchedModel::ProcResIter
- PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
- PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
- if (PI->ProcResourceIdx != K)
+ PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+ PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (PI->ProcResourceIdx != ResourceIdx)
continue;
- PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
+ Cycles +=
+ (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
}
}
+ return Cycles;
+ };
+
+ for (unsigned K = 0; K != PRDepths.size(); ++K) {
+ unsigned PRCycles = PRDepths[K] + PRHeights[K];
+ for (unsigned I = 0; I != Extrablocks.size(); ++I)
+ PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+ PRCycles += extraCycles(ExtraInstrs, K);
+ PRCycles -= extraCycles(RemoveInstrs, K);
PRMax = std::max(PRMax, PRCycles);
}
// Convert to cycle count.
PRMax = TE.MTM.getCycles(PRMax);
+ // Instrs: #instructions in current trace outside current block.
unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ // Add instruction count from the extra blocks.
for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ Instrs += ExtraInstrs.size();
+ Instrs -= RemoveInstrs.size();
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
Instrs /= IW;
// Assume issue width 1 without a schedule model.
return std::max(Instrs, PRMax);
}
+bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
+ const MachineInstr *UseMI) const {
+ if (DefMI->getParent() == UseMI->getParent())
+ return true;
+
+ const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
+ const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
+
+ return DepTBI.isUsefulDominator(TBI);
+}
+
void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
OS << getName() << " ensemble:\n";
for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 8515b0f..99f0583 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -46,6 +46,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
namespace {
@@ -214,9 +215,9 @@ namespace {
void report(const char *msg, const MachineBasicBlock *MBB,
const LiveInterval &LI);
void report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR);
+ const LiveRange &LR, unsigned Reg);
void report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR);
+ const LiveRange &LR, unsigned Reg);
void verifyInlineAsm(const MachineInstr *MI);
@@ -275,11 +276,12 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
raw_ostream *OutFile = nullptr;
if (OutFileName) {
- std::string ErrorInfo;
- OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+ std::error_code EC;
+ OutFile = new raw_fd_ostream(OutFileName, EC,
sys::fs::F_Append | sys::fs::F_Text);
- if (!ErrorInfo.empty()) {
- errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+ if (EC) {
+ errs() << "Error opening '" << OutFileName << "': " << EC.message()
+ << '\n';
exit(1);
}
@@ -292,8 +294,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
TM = &MF.getTarget();
- TII = TM->getInstrInfo();
- TRI = TM->getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
LiveVars = nullptr;
@@ -430,15 +432,17 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR) {
+ const LiveRange &LR, unsigned Reg) {
report(msg, MBB);
- *OS << "- liverange: " << LR << "\n";
+ *OS << "- liverange: " << LR << '\n';
+ *OS << "- register: " << PrintReg(Reg, TRI) << '\n';
}
void MachineVerifier::report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR) {
+ const LiveRange &LR, unsigned Reg) {
report(msg, MF);
- *OS << "- liverange: " << LR << "\n";
+ *OS << "- liverange: " << LR << '\n';
+ *OS << "- register: " << PrintReg(Reg, TRI) << '\n';
}
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -905,7 +909,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (!DRC->contains(Reg)) {
report("Illegal physical register for instruction", MO, MONum);
*OS << TRI->getName(Reg) << " is not a "
- << DRC->getName() << " register.\n";
+ << TRI->getRegClassName(DRC) << " register.\n";
}
}
} else {
@@ -916,13 +920,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
TRI->getSubClassWithSubReg(RC, SubIdx);
if (!SRC) {
report("Invalid subregister index for virtual register", MO, MONum);
- *OS << "Register class " << RC->getName()
+ *OS << "Register class " << TRI->getRegClassName(RC)
<< " does not support subreg index " << SubIdx << "\n";
return;
}
if (RC != SRC) {
report("Invalid register class for subregister index", MO, MONum);
- *OS << "Register class " << RC->getName()
+ *OS << "Register class " << TRI->getRegClassName(RC)
<< " does not fully support subreg index " << SubIdx << "\n";
return;
}
@@ -944,8 +948,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
if (!RC->hasSuperClassEq(DRC)) {
report("Illegal virtual register for instruction", MO, MONum);
- *OS << "Expected a " << DRC->getName() << " register, but got a "
- << RC->getName() << " register\n";
+ *OS << "Expected a " << TRI->getRegClassName(DRC)
+ << " register, but got a " << TRI->getRegClassName(RC)
+ << " register\n";
}
}
}
@@ -1357,13 +1362,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
if (!DefVNI) {
- report("Valno not live at def and not marked unused", MF, LR);
+ report("Valno not live at def and not marked unused", MF, LR, Reg);
*OS << "Valno #" << VNI->id << '\n';
return;
}
if (DefVNI != VNI) {
- report("Live segment at def has different valno", MF, LR);
+ report("Live segment at def has different valno", MF, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " where valno #" << DefVNI->id << " is live\n";
return;
@@ -1371,7 +1376,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
if (!MBB) {
- report("Invalid definition index", MF, LR);
+ report("Invalid definition index", MF, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LR << '\n';
return;
@@ -1379,7 +1384,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
if (VNI->isPHIDef()) {
if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MBB, LR);
+ report("PHIDef value is not defined at MBB start", MBB, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< ", not at the beginning of BB#" << MBB->getNumber() << '\n';
}
@@ -1389,7 +1394,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
// Non-PHI def.
const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
if (!MI) {
- report("No instruction at def index", MBB, LR);
+ report("No instruction at def index", MBB, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
return;
}
@@ -1422,12 +1427,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
// DEF slots.
if (isEarlyClobber) {
if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MBB, LR);
+ report("Early clobber def must be at an early-clobber slot", MBB, LR,
+ Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
}
} else if (!VNI->def.isRegister()) {
report("Non-PHI, non-early clobber def must be at a register slot",
- MBB, LR);
+ MBB, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
}
}
@@ -1441,31 +1447,31 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
assert(VNI && "Live segment has no valno");
if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
- report("Foreign valno in live segment", MF, LR);
+ report("Foreign valno in live segment", MF, LR, Reg);
*OS << S << " has a bad valno\n";
}
if (VNI->isUnused()) {
- report("Live segment valno is marked unused", MF, LR);
+ report("Live segment valno is marked unused", MF, LR, Reg);
*OS << S << '\n';
}
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
if (!MBB) {
- report("Bad start of live segment, no basic block", MF, LR);
+ report("Bad start of live segment, no basic block", MF, LR, Reg);
*OS << S << '\n';
return;
}
SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
if (S.start != MBBStartIdx && S.start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB, LR);
+ report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg);
*OS << S << '\n';
}
const MachineBasicBlock *EndMBB =
LiveInts->getMBBFromIndex(S.end.getPrevSlot());
if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF, LR);
+ report("Bad end of live segment, no basic block", MF, LR, Reg);
*OS << S << '\n';
return;
}
@@ -1483,14 +1489,14 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const MachineInstr *MI =
LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB, LR);
+ report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg);
*OS << S << '\n';
return;
}
// The block slot must refer to a basic block boundary.
if (S.end.isBlock()) {
- report("Live segment ends at B slot of an instruction", EndMBB, LR);
+ report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg);
*OS << S << '\n';
}
@@ -1498,7 +1504,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Segment ends on the dead slot.
// That means there must be a dead def.
if (!SlotIndex::isSameInstr(S.start, S.end)) {
- report("Live segment ending at dead slot spans instructions", EndMBB, LR);
+ report("Live segment ending at dead slot spans instructions", EndMBB, LR,
+ Reg);
*OS << S << '\n';
}
}
@@ -1508,7 +1515,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (S.end.isEarlyClobber()) {
if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", EndMBB, LR);
+ "redefined by an EC def in the same instruction", EndMBB, LR, Reg);
*OS << S << '\n';
}
}
@@ -1566,7 +1573,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// All predecessors must have a live-out value.
if (!PVNI) {
- report("Register not marked live out of predecessor", *PI, LR);
+ report("Register not marked live out of predecessor", *PI, LR, Reg);
*OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
<< '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
<< PEnd << '\n';
@@ -1575,7 +1582,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Only PHI-defs can take different predecessor values.
if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI, LR);
+ report("Different value live out of predecessor", *PI, LR, Reg);
*OS << "Valno #" << PVNI->id << " live out of BB#"
<< (*PI)->getNumber() << '@' << PEnd
<< "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 95a2934..a1042e7 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "phi-opt"
@@ -66,7 +67,7 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
return false;
MRI = &Fn.getRegInfo();
- TII = Fn.getTarget().getInstrInfo();
+ TII = Fn.getSubtarget().getInstrInfo();
// Find dead PHI cycles and PHI cycles that can be replaced by a single
// value. InstCombine does these optimizations, but DAG legalization may
@@ -91,7 +92,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
unsigned DstReg = MI->getOperand(0).getReg();
// See if we already saw this register.
- if (!PHIsInCycle.insert(MI))
+ if (!PHIsInCycle.insert(MI).second)
return true;
// Don't scan crazily complex things.
@@ -136,7 +137,7 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
"PHI destination is not a virtual register");
// See if we already saw this register.
- if (!PHIsInCycle.insert(MI))
+ if (!PHIsInCycle.insert(MI).second)
return true;
// Don't scan crazily complex things.
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index c8d0819..def2e3d 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -30,7 +30,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -150,9 +150,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
Changed |= EliminatePHINodes(MF, *I);
// Remove dead IMPLICIT_DEF instructions.
- for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(),
- E = ImpDefs.end(); I != E; ++I) {
- MachineInstr *DefMI = *I;
+ for (MachineInstr *DefMI : ImpDefs) {
unsigned DefReg = DefMI->getOperand(0).getReg();
if (MRI->use_nodbg_empty(DefReg)) {
if (LIS)
@@ -240,7 +238,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Insert a register to register copy at the top of the current block (but
// after any remaining phi nodes) which copies the new incoming register
// into the phi node destination.
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
if (isSourceDefinedByImplicitDef(MPhi, MRI))
// If all sources of a PHI node are implicit_def, just emit an
// implicit_def instead of a copy.
@@ -369,7 +367,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Check to make sure we haven't already emitted the copy for this block.
// This can happen because PHI nodes may have multiple entries for the same
// basic block.
- if (!MBBsInsertedInto.insert(&opBlock))
+ if (!MBBsInsertedInto.insert(&opBlock).second)
continue; // If the copy has already been emitted, we're done.
// Find a safe location to insert the copy, this may be the first terminator
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
index 48234ae..b997d7a 100644
--- a/lib/CodeGen/PHIEliminationUtils.h
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
-#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#ifndef LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 249b2d0..ec71d86 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
using namespace llvm;
@@ -71,6 +72,8 @@ static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
cl::desc("Disable Codegen Prepare"));
static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining",
+ cl::Hidden, cl::desc("Disable Partial Libcall Inlining"));
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
@@ -97,6 +100,10 @@ static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
cl::desc("Run live interval analysis earlier in the pipeline"));
+static cl::opt<bool> UseCFLAA("use-cfl-aa-in-codegen",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -374,7 +381,10 @@ void TargetPassConfig::addIRPasses() {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
+ if (UseCFLAA)
+ addPass(createCFLAliasAnalysisPass());
addPass(createTypeBasedAliasAnalysisPass());
+ addPass(createScopedNoAliasAAPass());
addPass(createBasicAliasAnalysisPass());
// Before running any passes, run the verifier to determine if the input
@@ -399,6 +409,9 @@ void TargetPassConfig::addIRPasses() {
// Prepare expensive constants for SelectionDAG.
if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
addPass(createConstantHoistingPass());
+
+ if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
+ addPass(createPartiallyInlineLibCallsPass());
}
/// Turn exception handling constructs into something the code generators can
@@ -416,7 +429,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// FALLTHROUGH
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- case ExceptionHandling::WinEH:
+ case ExceptionHandling::ItaniumWinEH:
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
@@ -433,6 +446,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
void TargetPassConfig::addCodeGenPrepare() {
if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
addPass(createCodeGenPreparePass(TM));
+ addPass(createRewriteSymbolsPass());
}
/// Add common passes that perform LLVM IR to IR transforms in preparation for
@@ -601,6 +615,9 @@ void TargetPassConfig::addMachineSSAOptimization() {
printAndVerify("After Machine LICM, CSE and Sinking passes");
addPass(&PeepholeOptimizerID);
+ // Clean-up the dead code that may have been generated by peephole
+ // rewriting.
+ addPass(&DeadMachineInstructionElimID);
printAndVerify("After codegen peephole optimization pass");
}
@@ -675,6 +692,12 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
return createTargetRegisterAllocator(Optimized);
}
+/// Return true if the default global register allocator is in use and
+/// has not be overriden on the command line with '-regalloc=...'
+bool TargetPassConfig::usingDefaultRegAlloc() const {
+ return RegAlloc.getNumOccurrences() == 0;
+}
+
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
@@ -709,6 +732,7 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(&TwoAddressInstructionPassID);
addPass(&RegisterCoalescerID);
+ printAndVerify("After Register Coalescing");
// PreRA instruction scheduling.
if (addPass(&MachineSchedulerID))
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 716cb1f..a296aea 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -46,7 +46,7 @@
// if it loads to virtual registers and the virtual register defined has
// a single use.
//
-// - Optimize Copies and Bitcast:
+// - Optimize Copies and Bitcast (more generally, target specific copies):
//
// Rewrite copies and bitcasts to avoid cross register bank copies
// when possible.
@@ -78,6 +78,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "peephole-opt"
@@ -92,7 +94,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
cl::desc("Disable the peephole optimizer"));
static cl::opt<bool>
-DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true),
+DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
cl::desc("Disable advanced copy optimization"));
STATISTIC(NumReuse, "Number of extension results reused");
@@ -100,12 +102,13 @@ STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
STATISTIC(NumLoadFold, "Number of loads folded");
STATISTIC(NumSelects, "Number of selects optimized");
-STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized");
+STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
+STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
- const TargetMachine *TM;
const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
@@ -129,9 +132,14 @@ namespace {
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ SmallPtrSetImpl<MachineInstr*> &LocalMIs);
bool optimizeSelect(MachineInstr *MI);
+ bool optimizeCondBranch(MachineInstr *MI);
bool optimizeCopyOrBitcast(MachineInstr *MI);
+ bool optimizeCoalescableCopy(MachineInstr *MI);
+ bool optimizeUncoalescableCopy(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool findNextSource(unsigned &Reg, unsigned &SubReg);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
@@ -140,6 +148,25 @@ namespace {
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
bool isLoadFoldable(MachineInstr *MI,
SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
+
+ /// \brief Check whether \p MI is understood by the register coalescer
+ /// but may require some rewriting.
+ bool isCoalescableCopy(const MachineInstr &MI) {
+ // SubregToRegs are not interesting, because they are already register
+ // coalescer friendly.
+ return MI.isCopy() || (!DisableAdvCopyOpt &&
+ (MI.isRegSequence() || MI.isInsertSubreg() ||
+ MI.isExtractSubreg()));
+ }
+
+ /// \brief Check whether \p MI is a copy like instruction that is
+ /// not recognized by the register coalescer.
+ bool isUncoalescableCopy(const MachineInstr &MI) {
+ return MI.isBitcast() ||
+ (!DisableAdvCopyOpt &&
+ (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+ MI.isExtractSubregLike()));
+ }
};
/// \brief Helper class to track the possible sources of a value defined by
@@ -176,63 +203,87 @@ namespace {
/// the ValueTracker class but that would have complicated the code of
/// the users of this class.
bool UseAdvancedTracking;
- /// Optional MachineRegisterInfo used to perform some complex
+ /// MachineRegisterInfo used to perform tracking.
+ const MachineRegisterInfo &MRI;
+ /// Optional TargetInstrInfo used to perform some complex
/// tracking.
- const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
/// \brief Dispatcher to the right underlying implementation of
/// getNextSource.
- bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for Copy instructions.
- bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for Bitcast instructions.
- bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for RegSequence
/// instructions.
- bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for InsertSubreg
/// instructions.
- bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for ExtractSubreg
/// instructions.
- bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for SubregToReg
/// instructions.
- bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg);
public:
- /// \brief Create a ValueTracker instance for the value defines by \p MI
- /// at the operand index \p DefIdx.
+ /// \brief Create a ValueTracker instance for the value defined by \p Reg.
/// \p DefSubReg represents the sub register index the value tracker will
- /// track. It does not need to match the sub register index used in \p MI.
+ /// track. It does not need to match the sub register index used in the
+ /// definition of \p Reg.
/// \p UseAdvancedTracking specifies whether or not the value tracker looks
/// through complex instructions. By default (false), it handles only copy
/// and bitcast instructions.
- /// \p MRI useful to perform some complex checks.
+ /// If \p Reg is a physical register, a value tracker constructed with
+ /// this constructor will not find any alternative source.
+ /// Indeed, when \p Reg is a physical register that constructor does not
+ /// know which definition of \p Reg it should track.
+ /// Use the next constructor to track a physical register.
+ ValueTracker(unsigned Reg, unsigned DefSubReg,
+ const MachineRegisterInfo &MRI,
+ bool UseAdvancedTracking = false,
+ const TargetInstrInfo *TII = nullptr)
+ : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg),
+ UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Def = MRI.getVRegDef(Reg);
+ DefIdx = MRI.def_begin(Reg).getOperandNo();
+ }
+ }
+
+ /// \brief Create a ValueTracker instance for the value defined by
+ /// the pair \p MI, \p DefIdx.
+ /// Unlike the other constructor, the value tracker produced by this one
+ /// may be able to find a new source when the definition is a physical
+ /// register.
+ /// This could be useful to rewrite target specific instructions into
+ /// generic copy instructions.
ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
+ const MachineRegisterInfo &MRI,
bool UseAdvancedTracking = false,
- const MachineRegisterInfo *MRI = nullptr)
+ const TargetInstrInfo *TII = nullptr)
: Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
- UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) {
- assert(Def->getOperand(DefIdx).isDef() &&
- Def->getOperand(DefIdx).isReg() &&
- "Definition does not match machine instruction");
- // Initially the value is in the defined register.
+ UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
+ assert(DefIdx < Def->getDesc().getNumDefs() &&
+ Def->getOperand(DefIdx).isReg() && "Invalid definition");
Reg = Def->getOperand(DefIdx).getReg();
}
/// \brief Following the use-def chain, get the next available source
/// for the tracked value.
- /// When the returned value is not nullptr, getReg() gives the register
+ /// When the returned value is not nullptr, \p SrcReg gives the register
/// that contain the tracked value.
/// \note The sub register index returned in \p SrcSubReg must be used
- /// on that getReg() to access the actual value.
+ /// on \p SrcReg to access the actual value.
/// \return Unless the returned value is nullptr (i.e., no source found),
- /// \p SrcIdx gives the index of the next source in the returned
- /// instruction and \p SrcSubReg the index to be used on that source to
- /// get the tracked value. When nullptr is returned, no alternative source
- /// has been found.
- const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \p SrcReg gives the register of the next source used in the returned
+ /// instruction and \p SrcSubReg the sub-register index to be used on that
+ /// source to get the tracked value. When nullptr is returned, no
+ /// alternative source has been found.
+ const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Get the last register where the initial value can be found.
/// Initially this is the register of the definition.
@@ -261,7 +312,7 @@ INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
/// debug uses.
bool PeepholeOptimizer::
optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+ SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
return false;
@@ -277,7 +328,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// Ensure DstReg can get a register class that actually supports
// sub-registers. Don't change the class until we commit.
const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
- DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
+ DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
if (!DstRC)
return false;
@@ -286,8 +337,8 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// register.
// If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
// SrcReg:SubIdx should be replaced.
- bool UseSrcSubIdx = TM->getRegisterInfo()->
- getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
+ bool UseSrcSubIdx =
+ TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
// The source has other uses. See if we can replace the other uses with use of
// the result of the extension.
@@ -447,6 +498,12 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
return true;
}
+/// \brief Check if a simpler conditional branch can be
+// generated
+bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
+ return TII->optimizeCondBranch(MI);
+}
+
/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
/// share the same register file.
static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
@@ -477,85 +534,34 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
}
-/// \brief Get the index of the definition and source for \p Copy
-/// instruction.
-/// \pre Copy.isCopy() or Copy.isBitcast().
-/// \return True if the Copy instruction has only one register source
-/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx
-/// are invalid.
-static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy,
- unsigned &DefIdx, unsigned &SrcIdx) {
- assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type.");
- if (Copy.isCopy()) {
- // Copy instruction are supposed to be: Def = Src.
- if (Copy.getDesc().getNumOperands() != 2)
- return false;
- DefIdx = 0;
- SrcIdx = 1;
- assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!");
- return true;
- }
- // Bitcast case.
- // Bitcasts with more than one def are not supported.
- if (Copy.getDesc().getNumDefs() != 1)
- return false;
- // Initialize SrcIdx to an undefined operand.
- SrcIdx = Copy.getDesc().getNumOperands();
- for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) {
- const MachineOperand &MO = Copy.getOperand(OpIdx);
- if (!MO.isReg() || !MO.getReg())
- continue;
- if (MO.isDef())
- DefIdx = OpIdx;
- else if (SrcIdx != EndOpIdx)
- // Multiple sources?
- return false;
- SrcIdx = OpIdx;
- }
- return true;
-}
-
-/// \brief Optimize a copy or bitcast instruction to avoid cross
-/// register bank copy. The optimization looks through a chain of
-/// copies and try to find a source that has a compatible register
-/// class.
-/// Two register classes are considered to be compatible if they share
-/// the same register bank.
-/// New copies issued by this optimization are register allocator
-/// friendly. This optimization does not remove any copy as it may
-/// overconstraint the register allocator, but replaces some when
-/// possible.
-/// \pre \p MI is a Copy (MI->isCopy() is true)
-/// \return True, when \p MI has been optimized. In that case, \p MI has
-/// been removed from its parent.
-bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
- unsigned DefIdx, SrcIdx;
- if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx))
+/// \brief Try to find the next source that share the same register file
+/// for the value defined by \p Reg and \p SubReg.
+/// When true is returned, \p Reg and \p SubReg are updated with the
+/// register number and sub-register index of the new source.
+/// \return False if no alternative sources are available. True otherwise.
+bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) {
+ // Do not try to find a new source for a physical register.
+ // So far we do not have any motivating example for doing that.
+ // Thus, instead of maintaining untested code, we will revisit that if
+ // that changes at some point.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
return false;
- const MachineOperand &MODef = MI->getOperand(DefIdx);
- assert(MODef.isReg() && "Copies must be between registers.");
- unsigned Def = MODef.getReg();
-
- if (TargetRegisterInfo::isPhysicalRegister(Def))
- return false;
-
- const TargetRegisterClass *DefRC = MRI->getRegClass(Def);
- unsigned DefSubReg = MODef.getSubReg();
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+ unsigned DefSubReg = SubReg;
unsigned Src;
unsigned SrcSubReg;
bool ShouldRewrite = false;
- const TargetRegisterInfo &TRI = *TM->getRegisterInfo();
// Follow the chain of copies until we reach the top of the use-def chain
// or find a more suitable source.
- ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI);
+ ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII);
do {
- unsigned CopySrcIdx, CopySrcSubReg;
- if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg))
+ unsigned CopySrcReg, CopySrcSubReg;
+ if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg))
break;
- Src = ValTracker.getReg();
+ Src = CopySrcReg;
SrcSubReg = CopySrcSubReg;
// Do not extend the live-ranges of physical registers as they add
@@ -569,29 +575,411 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
// If this source does not incur a cross register bank copy, use it.
- ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
+ ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC,
SrcSubReg);
} while (!ShouldRewrite);
// If we did not find a more suitable source, there is nothing to optimize.
- if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg())
+ if (!ShouldRewrite || Src == Reg)
return false;
- // Rewrite the copy to avoid a cross register bank penalty.
- unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def :
- MRI->createVirtualRegister(DefRC);
- MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVR)
- .addReg(Src, 0, SrcSubReg);
- NewCopy->getOperand(0).setSubReg(DefSubReg);
-
- MRI->replaceRegWith(Def, NewVR);
- MRI->clearKillFlags(NewVR);
- // We extended the lifetime of Src.
- // Clear the kill flags to account for that.
- MRI->clearKillFlags(Src);
+ Reg = Src;
+ SubReg = SrcSubReg;
+ return true;
+}
+
+namespace {
+/// \brief Helper class to rewrite the arguments of a copy-like instruction.
+class CopyRewriter {
+protected:
+ /// The copy-like instruction.
+ MachineInstr &CopyLike;
+ /// The index of the source being rewritten.
+ unsigned CurrentSrcIdx;
+
+public:
+ CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {}
+
+ virtual ~CopyRewriter() {}
+
+ /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
+ /// the related value that it affects (TrackReg, TrackSubReg).
+ /// A source is considered rewritable if its register class and the
+ /// register class of the related TrackReg may not be register
+ /// coalescer friendly. In other words, given a copy-like instruction
+ /// not all the arguments may be returned at rewritable source, since
+ /// some arguments are none to be register coalescer friendly.
+ ///
+ /// Each call of this method moves the current source to the next
+ /// rewritable source.
+ /// For instance, let CopyLike be the instruction to rewrite.
+ /// CopyLike has one definition and one source:
+ /// dst.dstSubIdx = CopyLike src.srcSubIdx.
+ ///
+ /// The first call will give the first rewritable source, i.e.,
+ /// the only source this instruction has:
+ /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
+ /// This source defines the whole definition, i.e.,
+ /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
+ ///
+ /// The second and subsequent calls will return false, has there is only one
+ /// rewritable source.
+ ///
+ /// \return True if a rewritable source has been found, false otherwise.
+ /// The output arguments are valid if and only if true is returned.
+ virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) {
+ // If CurrentSrcIdx == 1, this means this function has already been
+ // called once. CopyLike has one defintiion and one argument, thus,
+ // there is nothing else to rewrite.
+ if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
+ return false;
+ // This is the first call to getNextRewritableSource.
+ // Move the CurrentSrcIdx to remember that we made that call.
+ CurrentSrcIdx = 1;
+ // The rewritable source is the argument.
+ const MachineOperand &MOSrc = CopyLike.getOperand(1);
+ SrcReg = MOSrc.getReg();
+ SrcSubReg = MOSrc.getSubReg();
+ // What we track are the alternative sources of the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+ return true;
+ }
+
+ /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
+ /// if possible.
+ /// \return True if the rewritting was possible, false otherwise.
+ virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
+ if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
+ return false;
+ MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
+ MOSrc.setReg(NewReg);
+ MOSrc.setSubReg(NewSubReg);
+ return true;
+ }
+};
+
+/// \brief Specialized rewriter for INSERT_SUBREG instruction.
+class InsertSubregRewriter : public CopyRewriter {
+public:
+ InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) {
+ assert(MI.isInsertSubreg() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
+ /// Src1 has the same register class has dst, hence, there is
+ /// nothing to rewrite.
+ /// Src2.src2SubIdx, may not be register coalescer friendly.
+ /// Therefore, the first call to this method returns:
+ /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx).
+ ///
+ /// Subsequence calls will return false.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // If we already get the only source we can rewrite, return false.
+ if (CurrentSrcIdx == 2)
+ return false;
+ // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
+ CurrentSrcIdx = 2;
+ const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
+ SrcReg = MOInsertedReg.getReg();
+ SrcSubReg = MOInsertedReg.getSubReg();
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+
+ // We want to track something that is compatible with the
+ // partial definition.
+ TrackReg = MODef.getReg();
+ if (MODef.getSubReg())
+ // Bails if we have to compose sub-register indices.
+ return false;
+ TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
+ return true;
+ }
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ if (CurrentSrcIdx != 2)
+ return false;
+ // We are rewriting the inserted reg.
+ MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+ MO.setReg(NewReg);
+ MO.setSubReg(NewSubReg);
+ return true;
+ }
+};
+
+/// \brief Specialized rewriter for EXTRACT_SUBREG instruction.
+class ExtractSubregRewriter : public CopyRewriter {
+ const TargetInstrInfo &TII;
+
+public:
+ ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
+ : CopyRewriter(MI), TII(TII) {
+ assert(MI.isExtractSubreg() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
+ /// There is only one rewritable source: Src.subIdx,
+ /// which defines dst.dstSubIdx.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // If we already get the only source we can rewrite, return false.
+ if (CurrentSrcIdx == 1)
+ return false;
+ // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
+ CurrentSrcIdx = 1;
+ const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
+ SrcReg = MOExtractedReg.getReg();
+ // If we have to compose sub-register indices, bails out.
+ if (MOExtractedReg.getSubReg())
+ return false;
+
+ SrcSubReg = CopyLike.getOperand(2).getImm();
+
+ // We want to track something that is compatible with the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+ return true;
+ }
+
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ // The only source we can rewrite is the input register.
+ if (CurrentSrcIdx != 1)
+ return false;
+
+ CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
+
+ // If we find a source that does not require to extract something,
+ // rewrite the operation with a copy.
+ if (!NewSubReg) {
+ // Move the current index to an invalid position.
+ // We do not want another call to this method to be able
+ // to do any change.
+ CurrentSrcIdx = -1;
+ // Rewrite the operation as a COPY.
+ // Get rid of the sub-register index.
+ CopyLike.RemoveOperand(2);
+ // Morph the operation into a COPY.
+ CopyLike.setDesc(TII.get(TargetOpcode::COPY));
+ return true;
+ }
+ CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
+ return true;
+ }
+};
+
+/// \brief Specialized rewriter for REG_SEQUENCE instruction.
+class RegSequenceRewriter : public CopyRewriter {
+public:
+ RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) {
+ assert(MI.isRegSequence() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
+ /// Each call will return a different source, walking all the available
+ /// source.
+ ///
+ /// The first call returns:
+ /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx1).
+ ///
+ /// The second call returns:
+ /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx2).
+ ///
+ /// And so on, until all the sources have been traversed, then
+ /// it returns false.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
+
+ // If this is the first call, move to the first argument.
+ if (CurrentSrcIdx == 0) {
+ CurrentSrcIdx = 1;
+ } else {
+ // Otherwise, move to the next argument and check that it is valid.
+ CurrentSrcIdx += 2;
+ if (CurrentSrcIdx >= CopyLike.getNumOperands())
+ return false;
+ }
+ const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
+ SrcReg = MOInsertedReg.getReg();
+ // If we have to compose sub-register indices, bails out.
+ if ((SrcSubReg = MOInsertedReg.getSubReg()))
+ return false;
+
+ // We want to track something that is compatible with the related
+ // partial definition.
+ TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
+
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ // If we have to compose sub-registers, bails.
+ return MODef.getSubReg() == 0;
+ }
+
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ // We cannot rewrite out of bound operands.
+ // Moreover, rewritable sources are at odd positions.
+ if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
+ return false;
+
+ MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+ MO.setReg(NewReg);
+ MO.setSubReg(NewSubReg);
+ return true;
+ }
+};
+} // End namespace.
+
+/// \brief Get the appropriated CopyRewriter for \p MI.
+/// \return A pointer to a dynamically allocated CopyRewriter or nullptr
+/// if no rewriter works for \p MI.
+static CopyRewriter *getCopyRewriter(MachineInstr &MI,
+ const TargetInstrInfo &TII) {
+ switch (MI.getOpcode()) {
+ default:
+ return nullptr;
+ case TargetOpcode::COPY:
+ return new CopyRewriter(MI);
+ case TargetOpcode::INSERT_SUBREG:
+ return new InsertSubregRewriter(MI);
+ case TargetOpcode::EXTRACT_SUBREG:
+ return new ExtractSubregRewriter(MI, TII);
+ case TargetOpcode::REG_SEQUENCE:
+ return new RegSequenceRewriter(MI);
+ }
+ llvm_unreachable(nullptr);
+}
+
+/// \brief Optimize generic copy instructions to avoid cross
+/// register bank copy. The optimization looks through a chain of
+/// copies and tries to find a source that has a compatible register
+/// class.
+/// Two register classes are considered to be compatible if they share
+/// the same register bank.
+/// New copies issued by this optimization are register allocator
+/// friendly. This optimization does not remove any copy as it may
+/// overconstraint the register allocator, but replaces some operands
+/// when possible.
+/// \pre isCoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been rewritten. False otherwise.
+bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
+ assert(MI && isCoalescableCopy(*MI) && "Invalid argument");
+ assert(MI->getDesc().getNumDefs() == 1 &&
+ "Coalescer can understand multiple defs?!");
+ const MachineOperand &MODef = MI->getOperand(0);
+ // Do not rewrite physical definitions.
+ if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ return false;
+
+ bool Changed = false;
+ // Get the right rewriter for the current copy.
+ std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII));
+ // If none exists, bails out.
+ if (!CpyRewriter)
+ return false;
+ // Rewrite each rewritable source.
+ unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
+ while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
+ TrackSubReg)) {
+ unsigned NewSrc = TrackReg;
+ unsigned NewSubReg = TrackSubReg;
+ // Try to find a more suitable source.
+ // If we failed to do so, or get the actual source,
+ // move to the next source.
+ if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
+ continue;
+ // Rewrite source.
+ if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) {
+ // We may have extended the live-range of NewSrc, account for that.
+ MRI->clearKillFlags(NewSrc);
+ Changed = true;
+ }
+ }
+ // TODO: We could have a clean-up method to tidy the instruction.
+ // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
+ // => v0 = COPY v1
+ // Currently we haven't seen motivating example for that and we
+ // want to avoid untested code.
+ NumRewrittenCopies += Changed == true;
+ return Changed;
+}
+
+/// \brief Optimize copy-like instructions to create
+/// register coalescer friendly instruction.
+/// The optimization tries to kill-off the \p MI by looking
+/// through a chain of copies to find a source that has a compatible
+/// register class.
+/// If such a source is found, it replace \p MI by a generic COPY
+/// operation.
+/// \pre isUncoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been optimized. In that case, \p MI has
+/// been removed from its parent.
+/// All COPY instructions created, are inserted in \p LocalMIs.
+bool PeepholeOptimizer::optimizeUncoalescableCopy(
+ MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
+ assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
+
+ // Check if we can rewrite all the values defined by this instruction.
+ SmallVector<
+ std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
+ 4> RewritePairs;
+ for (const MachineOperand &MODef : MI->defs()) {
+ if (MODef.isDead())
+ // We can ignore those.
+ continue;
+
+ // If a physical register is here, this is probably for a good reason.
+ // Do not rewrite that.
+ if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ return false;
+
+ // If we do not know how to rewrite this definition, there is no point
+ // in trying to kill this instruction.
+ TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg());
+ TargetInstrInfo::RegSubRegPair Src = Def;
+ if (!findNextSource(Src.Reg, Src.SubReg))
+ return false;
+ RewritePairs.push_back(std::make_pair(Def, Src));
+ }
+ // The change is possible for all defs, do it.
+ for (const auto &PairDefSrc : RewritePairs) {
+ const auto &Def = PairDefSrc.first;
+ const auto &Src = PairDefSrc.second;
+ // Rewrite the "copy" in a way the register coalescer understands.
+ assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
+ "We do not rewrite physical registers");
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
+ unsigned NewVR = MRI->createVirtualRegister(DefRC);
+ MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ NewVR).addReg(Src.Reg, 0, Src.SubReg);
+ NewCopy->getOperand(0).setSubReg(Def.SubReg);
+ if (Def.SubReg)
+ NewCopy->getOperand(0).setIsUndef();
+ LocalMIs.insert(NewCopy);
+ MRI->replaceRegWith(Def.Reg, NewVR);
+ MRI->clearKillFlags(NewVR);
+ // We extended the lifetime of Src.
+ // Clear the kill flags to account for that.
+ MRI->clearKillFlags(Src.Reg);
+ }
+ // MI is now dead.
MI->eraseFromParent();
- ++NumCopiesBitcasts;
+ ++NumUncoalescableCopies;
return true;
}
@@ -673,8 +1061,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (DisablePeephole)
return false;
- TM = &MF.getTarget();
- TII = TM->getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
@@ -684,7 +1072,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = &*I;
bool SeenMoveImm = false;
- SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallPtrSet<MachineInstr*, 16> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
@@ -711,7 +1099,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->mayStore() || MI->isCall())
FoldAsLoadDefCandidates.clear();
- if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
+ if ((isUncoalescableCopy(*MI) &&
+ optimizeUncoalescableCopy(MI, LocalMIs)) ||
(MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
(MI->isSelect() && optimizeSelect(MI))) {
// MI is deleted.
@@ -720,6 +1109,17 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (MI->isConditionalBranch() && optimizeCondBranch(MI)) {
+ Changed = true;
+ continue;
+ }
+
+ if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) {
+ // MI is just rewritten.
+ Changed = true;
+ continue;
+ }
+
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
SeenMoveImm = true;
} else {
@@ -781,24 +1181,25 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
unsigned &SrcSubReg) {
assert(Def->isCopy() && "Invalid definition");
// Copy instruction are supposed to be: Def = Src.
// If someone breaks this assumption, bad things will happen everywhere.
- assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands");
+ assert(Def->getNumOperands() == 2 && "Invalid number of operands");
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of src.
// Bails as we do not support composing subreg yet.
return false;
// Otherwise, we want the whole source.
- SrcIdx = 1;
- SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ const MachineOperand &Src = Def->getOperand(1);
+ SrcReg = Src.getReg();
+ SrcSubReg = Src.getSubReg();
return true;
}
-bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
unsigned &SrcSubReg) {
assert(Def->isBitcast() && "Invalid definition");
@@ -814,7 +1215,7 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
// Bails as we do not support composing subreg yet.
return false;
- SrcIdx = Def->getDesc().getNumOperands();
+ unsigned SrcIdx = Def->getNumOperands();
for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
++OpIdx) {
const MachineOperand &MO = Def->getOperand(OpIdx);
@@ -826,13 +1227,16 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
return false;
SrcIdx = OpIdx;
}
- SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ const MachineOperand &Src = Def->getOperand(SrcIdx);
+ SrcReg = Src.getReg();
+ SrcSubReg = Src.getSubReg();
return true;
}
-bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
unsigned &SrcSubReg) {
- assert(Def->isRegSequence() && "Invalid definition");
+ assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
+ "Invalid definition");
if (Def->getOperand(DefIdx).getSubReg())
// If we are composing subreg, bails out.
@@ -851,19 +1255,26 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
// turn that into an assertion.
return false;
+ if (!TII)
+ // We could handle the REG_SEQUENCE here, but we do not want to
+ // duplicate the code from the generic TII.
+ return false;
+
+ SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
+ if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
+ return false;
+
// We are looking at:
// Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
// Check if one of the operand defines the subreg we are interested in.
- for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands();
- OpIdx != EndOpIdx; OpIdx += 2) {
- const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1);
- assert(MOSubIdx.isImm() &&
- "One of the subindex of the reg_sequence is not an immediate");
- if (MOSubIdx.getImm() == DefSubReg) {
- assert(Def->getOperand(OpIdx).isReg() &&
- "One of the source of the reg_sequence is not a register");
- SrcIdx = OpIdx;
- SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ for (auto &RegSeqInput : RegSeqInputRegs) {
+ if (RegSeqInput.SubIdx == DefSubReg) {
+ if (RegSeqInput.SubReg)
+ // Bails if we have to compose sub registers.
+ return false;
+
+ SrcReg = RegSeqInput.Reg;
+ SrcSubReg = RegSeqInput.SubReg;
return true;
}
}
@@ -874,61 +1285,68 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
return false;
}
-bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
unsigned &SrcSubReg) {
- assert(Def->isInsertSubreg() && "Invalid definition");
+ assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
+ "Invalid definition");
+
if (Def->getOperand(DefIdx).getSubReg())
// If we are composing subreg, bails out.
// Same remark as getNextSourceFromRegSequence.
// I.e., this may be turned into an assert.
return false;
+ if (!TII)
+ // We could handle the REG_SEQUENCE here, but we do not want to
+ // duplicate the code from the generic TII.
+ return false;
+
+ TargetInstrInfo::RegSubRegPair BaseReg;
+ TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
+ if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
+ return false;
+
// We are looking at:
// Def = INSERT_SUBREG v0, v1, sub1
// There are two cases:
// 1. DefSubReg == sub1, get v1.
// 2. DefSubReg != sub1, the value may be available through v0.
- // #1 Check if the inserted register matches the require sub index.
- unsigned InsertedSubReg = Def->getOperand(3).getImm();
- if (InsertedSubReg == DefSubReg) {
- SrcIdx = 2;
- SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ // #1 Check if the inserted register matches the required sub index.
+ if (InsertedReg.SubIdx == DefSubReg) {
+ SrcReg = InsertedReg.Reg;
+ SrcSubReg = InsertedReg.SubReg;
return true;
}
// #2 Otherwise, if the sub register we are looking for is not partial
// defined by the inserted element, we can look through the main
// register (v0).
- // To check the overlapping we need a MRI and a TRI.
- if (!MRI)
- return false;
-
const MachineOperand &MODef = Def->getOperand(DefIdx);
- const MachineOperand &MOBase = Def->getOperand(1);
// If the result register (Def) and the base register (v0) do not
// have the same register class or if we have to compose
// subregisters, bails out.
- if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) ||
- MOBase.getSubReg())
+ if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
+ BaseReg.SubReg)
return false;
- // Get the TRI and check if inserted sub register overlaps with the
- // sub register we are tracking.
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ // Get the TRI and check if the inserted sub-register overlaps with the
+ // sub-register we are tracking.
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
if (!TRI ||
(TRI->getSubRegIndexLaneMask(DefSubReg) &
- TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0)
+ TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
return false;
// At this point, the value is available in v0 via the same subreg
// we used for Def.
- SrcIdx = 1;
+ SrcReg = BaseReg.Reg;
SrcSubReg = DefSubReg;
return true;
}
-bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
unsigned &SrcSubReg) {
- assert(Def->isExtractSubreg() && "Invalid definition");
+ assert((Def->isExtractSubreg() ||
+ Def->isExtractSubregLike()) && "Invalid definition");
// We are looking at:
// Def = EXTRACT_SUBREG v0, sub0
@@ -937,17 +1355,26 @@ bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
if (DefSubReg)
return false;
+ if (!TII)
+ // We could handle the EXTRACT_SUBREG here, but we do not want to
+ // duplicate the code from the generic TII.
+ return false;
+
+ TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
+ if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
+ return false;
+
// Bails if we have to compose sub registers.
// Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
- if (Def->getOperand(1).getSubReg())
+ if (ExtractSubregInputReg.SubReg)
return false;
// Otherwise, the value is available in the v0.sub0.
- SrcIdx = 1;
- SrcSubReg = Def->getOperand(2).getImm();
+ SrcReg = ExtractSubregInputReg.Reg;
+ SrcSubReg = ExtractSubregInputReg.SubIdx;
return true;
}
-bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
unsigned &SrcSubReg) {
assert(Def->isSubregToReg() && "Invalid definition");
// We are looking at:
@@ -964,37 +1391,37 @@ bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
if (Def->getOperand(2).getSubReg())
return false;
- SrcIdx = 2;
+ SrcReg = Def->getOperand(2).getReg();
SrcSubReg = Def->getOperand(3).getImm();
return true;
}
-bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) {
+bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) {
assert(Def && "This method needs a valid definition");
assert(
(DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
if (Def->isCopy())
- return getNextSourceFromCopy(SrcIdx, SrcSubReg);
+ return getNextSourceFromCopy(SrcReg, SrcSubReg);
if (Def->isBitcast())
- return getNextSourceFromBitcast(SrcIdx, SrcSubReg);
+ return getNextSourceFromBitcast(SrcReg, SrcSubReg);
// All the remaining cases involve "complex" instructions.
// Bails if we did not ask for the advanced tracking.
if (!UseAdvancedTracking)
return false;
- if (Def->isRegSequence())
- return getNextSourceFromRegSequence(SrcIdx, SrcSubReg);
- if (Def->isInsertSubreg())
- return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg);
- if (Def->isExtractSubreg())
- return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg);
+ if (Def->isRegSequence() || Def->isRegSequenceLike())
+ return getNextSourceFromRegSequence(SrcReg, SrcSubReg);
+ if (Def->isInsertSubreg() || Def->isInsertSubregLike())
+ return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg);
+ if (Def->isExtractSubreg() || Def->isExtractSubregLike())
+ return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg);
if (Def->isSubregToReg())
- return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg);
+ return getNextSourceFromSubregToReg(SrcReg, SrcSubReg);
return false;
}
-const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
+const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg,
unsigned &SrcSubReg) {
// If we reach a point where we cannot move up in the use-def chain,
// there is nothing we can get.
@@ -1003,20 +1430,18 @@ const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
const MachineInstr *PrevDef = nullptr;
// Try to find the next source.
- if (getNextSourceImpl(SrcIdx, SrcSubReg)) {
+ if (getNextSourceImpl(SrcReg, SrcSubReg)) {
// Update definition, definition index, and subregister for the
// next call of getNextSource.
- const MachineOperand &MO = Def->getOperand(SrcIdx);
- assert(MO.isReg() && !MO.isDef() && "Source is invalid");
// Update the current register.
- Reg = MO.getReg();
+ Reg = SrcReg;
// Update the return value before moving up in the use-def chain.
PrevDef = Def;
// If we can still move up in the use-def chain, move to the next
// defintion.
if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
- Def = MRI->getVRegDef(Reg);
- DefIdx = MRI->def_begin(Reg).getOperandNo();
+ Def = MRI.getVRegDef(Reg);
+ DefIdx = MRI.def_begin(Reg).getOperandNo();
DefSubReg = SrcSubReg;
return PrevDef;
}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index db3933e..89e1d11 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -41,7 +41,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -98,6 +97,11 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ bool enablePostRAScheduler(
+ const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
};
char PostRAScheduler::ID = 0;
@@ -132,10 +136,10 @@ namespace {
public:
SchedulePostRATDList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- AliasAnalysis *AA, const RegisterClassInfo&,
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
~SchedulePostRATDList();
@@ -188,16 +192,17 @@ INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
"Post RA top-down list latency scheduler", false, false)
SchedulePostRATDList::SchedulePostRATDList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- AliasAnalysis *AA, const RegisterClassInfo &RCI,
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
-
- const TargetMachine &TM = MF.getTarget();
- const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
+
+ const InstrItineraryData *InstrItins =
+ MF.getSubtarget().getInstrItineraryData();
HazardRec =
- TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+ MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
+ InstrItins, this);
assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
MRI.tracksLiveness()) &&
@@ -245,13 +250,23 @@ void SchedulePostRATDList::dumpSchedule() const {
}
#endif
+bool PostRAScheduler::enablePostRAScheduler(
+ const TargetSubtargetInfo &ST,
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
+ Mode = ST.getAntiDepBreakMode();
+ ST.getCriticalPathRCs(CriticalPathRCs);
+ return ST.enablePostMachineScheduler() &&
+ OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
+}
+
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
if (skipOptnoneFunction(*Fn.getFunction()))
return false;
- TII = Fn.getTarget().getInstrInfo();
+ TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
@@ -267,9 +282,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
} else {
// Check that post-RA scheduling is enabled for this target.
// This may upgrade the AntiDepMode.
- const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
- if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
- CriticalPathRCs))
+ const TargetSubtargetInfo &ST =
+ Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
+ if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(),
+ AntiDepMode, CriticalPathRCs))
return false;
}
@@ -284,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
DEBUG(dbgs() << "PostRAScheduler\n");
- SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
+ SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
CriticalPathRCs);
// Loop over all of the basic blocks
@@ -543,10 +559,10 @@ void SchedulePostRATDList::ListScheduleTopDown() {
if (HT == ScheduleHazardRecognizer::NoHazard) {
if (HazardRec->ShouldPreferAnother(CurSUnit)) {
if (!NotPreferredSUnit) {
- // If this is the first non-preferred node for this cycle, then
- // record it and continue searching for a preferred node. If this
- // is not the first non-preferred node, then treat it as though
- // there had been a hazard.
+ // If this is the first non-preferred node for this cycle, then
+ // record it and continue searching for a preferred node. If this
+ // is not the first non-preferred node, then treat it as though
+ // there had been a hazard.
NotPreferredSUnit = CurSUnit;
continue;
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 3129927..b153800 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -138,8 +139,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
assert(WorkList.empty() && "Inconsistent worklist state");
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index b98d210..06530b9 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -41,6 +41,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -110,8 +111,8 @@ typedef SmallSetVector<int, 8> StackObjSet;
///
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const Function* F = Fn.getFunction();
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
- const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
@@ -185,8 +186,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
/// variables for the function's frame information and eliminate call frame
/// pseudo instructions.
void PEI::calculateCallsInformation(MachineFunction &Fn) {
- const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
- const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
unsigned MaxCallFrameSize = 0;
@@ -239,8 +240,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
/// registers.
void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
- const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo();
- const TargetFrameLowering *TFI = F.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = F.getFrameInfo();
// Get the callee saved register list...
@@ -337,9 +338,9 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
if (CSI.empty())
return;
- const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
- const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
MachineBasicBlock::iterator I;
// Spill using target interface.
@@ -445,7 +446,7 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
/// abstract stack objects.
///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
StackProtector *SP = &getAnalysis<StackProtector>();
bool StackGrowsDown =
@@ -515,7 +516,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure the special register scavenging spill slot is closest to the
// incoming stack pointer if a frame pointer is required and is closer
// to the incoming rather than the final stack pointer.
- const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
TFI.isFPCloseToIncomingSP() &&
RegInfo->useFPForScavengingIndex(Fn) &&
@@ -670,7 +671,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
/// prolog and epilog code to the function.
///
void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
// Add prologue to the function...
TFI.emitPrologue(Fn);
@@ -710,8 +711,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
SmallPtrSet<MachineBasicBlock*, 8> Reachable;
// Iterate over the reachable blocks in DFS order.
- for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
- DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
+ for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
DFI != DFE; ++DFI) {
int SPAdj = 0;
// Check the exit state of the DFS stack predecessor.
@@ -738,11 +738,11 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
int &SPAdj) {
- const TargetMachine &TM = Fn.getTarget();
- assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
- const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
+ assert(Fn.getSubtarget().getRegisterInfo() &&
+ "getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
@@ -837,7 +837,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
/// iterate over the vreg use list, which at this point only contains machine
/// operands for which eliminateFrameIndex need a new scratch reg.
-void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+void
+PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Run through the instructions and find any virtual registers.
for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) {
@@ -888,12 +889,16 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
+ MachineRegisterInfo &MRI = Fn.getRegInfo();
Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+
+ // Make sure MRI now accounts this register as used.
+ MRI.setPhysRegUsed(ScratchReg);
// Because this instruction was processed by the RS before this
// register was allocated, make sure that the RS now records the
// register as being used.
- RS->setUsed(ScratchReg);
+ RS->setRegUsed(ScratchReg);
}
}
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 5a6d39a..f88b8ef 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -16,8 +16,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_PEI_H
-#define LLVM_CODEGEN_PEI_H
+#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
+#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SparseBitVector.h"
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 12b2c90..b1c341d 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -107,13 +107,9 @@ bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
}
bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
- // Negative frame indices are used for special things that don't
- // appear in LLVM IR. Non-negative indices may be used for things
- // like static allocas.
if (!MFI)
- return FI >= 0;
- // Spill slots should not alias others.
- return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
+ return true;
+ return MFI->isAliasedObjectIndex(FI);
}
bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 894aee7..122afd1 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#ifndef NDEBUG
#include "llvm/ADT/SparseBitVector.h"
@@ -102,7 +101,7 @@ void RegAllocBase::allocatePhysRegs() {
// register if possible and populate a list of new live intervals that
// result from splitting.
DEBUG(dbgs() << "\nselectOrSplit "
- << MRI->getRegClass(VirtReg->reg)->getName()
+ << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
<< ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
typedef SmallVector<unsigned, 4> VirtRegVec;
VirtRegVec SplitVRegs;
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index b333c36..bbd79cd 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -34,8 +34,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_REGALLOCBASE
-#define LLVM_CODEGEN_REGALLOCBASE
+#ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H
+#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
@@ -106,4 +106,4 @@ private:
} // end namespace llvm
-#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
+#endif
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index b722098..0090332 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -33,7 +33,6 @@
#include "llvm/PassAnalysisSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cstdlib>
#include <queue>
@@ -157,7 +156,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
}
void RABasic::releaseMemory() {
- SpillerInstance.reset(nullptr);
+ SpillerInstance.reset();
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 97b9f76..8fc10b4 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -33,7 +33,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -53,7 +53,6 @@ namespace {
RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
isBulkSpilling(false) {}
private:
- const TargetMachine *TM;
MachineFunction *MF;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
@@ -298,7 +297,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
LiveDbgValueMap[LRI->VirtReg];
for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
MachineInstr *DBG = LRIDbgValues[li];
- const MDNode *MDPtr = DBG->getOperand(2).getMetadata();
+ const MDNode *Var = DBG->getDebugVariable();
+ const MDNode *Expr = DBG->getDebugExpression();
bool IsIndirect = DBG->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
DebugLoc DL;
@@ -308,10 +308,13 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
DL = (--EI)->getDebugLoc();
} else
DL = MI->getDebugLoc();
- MachineBasicBlock *MBB = DBG->getParent();
MachineInstr *NewDV =
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr);
+ .addFrameIndex(FI)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
}
@@ -545,7 +548,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
}
DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
- << RC->getName() << "\n");
+ << TRI->getRegClassName(RC) << "\n");
unsigned BestReg = 0, BestCost = spillImpossible;
for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
@@ -705,7 +708,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
continue;
if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
(MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
- if (ThroughRegs.insert(Reg))
+ if (ThroughRegs.insert(Reg).second)
DEBUG(dbgs() << ' ' << PrintReg(Reg));
}
}
@@ -862,13 +865,16 @@ void RAFast::AllocateBasicBlock() {
// Modify DBG_VALUE now that the value is in a spill slot.
bool IsIndirect = MI->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *MDPtr =
- MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ const MDNode *Var = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock *MBB = MI->getParent();
MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr);
+ .addFrameIndex(SS)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
DEBUG(dbgs() << "Modifying debug info due to spill:"
<< "\t" << *NewDV);
// Scan NewDV operands from the beginning.
@@ -1070,9 +1076,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
<< "********** Function: " << Fn.getName() << '\n');
MF = &Fn;
MRI = &MF->getRegInfo();
- TM = &Fn.getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
UsedInInstr.clear();
@@ -1093,9 +1098,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
}
// Add the clobber lists for all the instructions we skipped earlier.
- for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
- I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
- if (const uint16_t *Defs = (*I)->getImplicitDefs())
+ for (const MCInstrDesc *Desc : SkippedInstrs)
+ if (const uint16_t *Defs = Desc->getImplicitDefs())
while (*Defs)
MRI->setPhysRegUsed(*Defs++);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 901b993..8ef5dcd 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -486,7 +486,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
}
void RAGreedy::releaseMemory() {
- SpillerInstance.reset(nullptr);
+ SpillerInstance.reset();
ExtraRegInfo.clear();
GlobalCand.clear();
}
@@ -514,7 +514,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
bool ReverseLocal = TRI->reverseLocalAssignment();
- bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() &&
+ bool ForceGlobal = !ReverseLocal &&
(Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs());
if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
@@ -817,7 +817,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
unsigned MinCost = RegClassInfo.getMinCost(RC);
if (MinCost >= CostPerUseLimit) {
- DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost
+ DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost
<< ", no cheaper registers to be found.\n");
return 0;
}
@@ -967,14 +967,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
BCS[B].Exit = SpillPlacement::PrefSpill;
if (++B == GroupSize) {
- ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
- SpillPlacer->addConstraints(Array);
+ SpillPlacer->addConstraints(makeArrayRef(BCS, B));
B = 0;
}
}
- ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
- SpillPlacer->addConstraints(Array);
+ SpillPlacer->addConstraints(makeArrayRef(BCS, B));
SpillPlacer->addLinks(makeArrayRef(TBS, T));
}
@@ -1013,7 +1011,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Compute through constraints from the interference, or assume that all
// through blocks prefer spilling when forming compact regions.
- ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
+ auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
if (Cand.PhysReg)
addThroughConstraints(Cand.Intf, NewBlocks);
else
@@ -1791,9 +1789,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// instructions.
//
// Try to guess the size of the new interval.
- const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
- Uses[SplitBefore].distance(Uses[SplitAfter]) +
- (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
+ const float EstWeight = normalizeSpillWeight(
+ blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
+ 1);
// Would this split be possible to allocate?
// Never allocate all gaps, we wouldn't be making progress.
DEBUG(dbgs() << " w=" << EstWeight);
@@ -2319,13 +2319,13 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
<< "********** Function: " << mf.getName() << '\n');
MF = &mf;
- const TargetMachine &TM = MF->getTarget();
- TRI = TM.getRegisterInfo();
- TII = TM.getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
RCI.runOnMachineFunction(mf);
EnableLocalReassign = EnableLocalReassignment ||
- TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel());
+ MF->getSubtarget().enableRALocalReassignment(
+ MF->getTarget().getOptLevel());
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index b8d2325..eb7e5633 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -49,9 +49,10 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <limits>
#include <memory>
+#include <queue>
#include <set>
#include <sstream>
#include <vector>
@@ -61,17 +62,17 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
static RegisterRegAlloc
-registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
createDefaultPBQPRegisterAllocator);
static cl::opt<bool>
-pbqpCoalescing("pbqp-coalescing",
+PBQPCoalescing("pbqp-coalescing",
cl::desc("Attempt coalescing during PBQP register allocation."),
cl::init(false), cl::Hidden);
#ifndef NDEBUG
static cl::opt<bool>
-pbqpDumpGraphs("pbqp-dump-graphs",
+PBQPDumpGraphs("pbqp-dump-graphs",
cl::desc("Dump graphs for each function/round in the compilation unit."),
cl::init(false), cl::Hidden);
#endif
@@ -88,8 +89,8 @@ public:
static char ID;
/// Construct a PBQP register allocator.
- RegAllocPBQP(std::unique_ptr<PBQPBuilder> &b, char *cPassID=nullptr)
- : MachineFunctionPass(ID), builder(b.release()), customPassID(cPassID) {
+ RegAllocPBQP(char *cPassID = nullptr)
+ : MachineFunctionPass(ID), customPassID(cPassID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
@@ -117,301 +118,320 @@ private:
typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
typedef std::set<unsigned> RegSet;
- std::unique_ptr<PBQPBuilder> builder;
-
char *customPassID;
- MachineFunction *mf;
- const TargetMachine *tm;
- const TargetRegisterInfo *tri;
- const TargetInstrInfo *tii;
- MachineRegisterInfo *mri;
- const MachineBlockFrequencyInfo *mbfi;
-
- std::unique_ptr<Spiller> spiller;
- LiveIntervals *lis;
- LiveStacks *lss;
- VirtRegMap *vrm;
-
- RegSet vregsToAlloc, emptyIntervalVRegs;
+ RegSet VRegsToAlloc, EmptyIntervalVRegs;
/// \brief Finds the initial set of vreg intervals to allocate.
- void findVRegIntervalsToAlloc();
+ void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
+
+ /// \brief Constructs an initial graph.
+ void initializeGraph(PBQPRAGraph &G);
/// \brief Given a solved PBQP problem maps this solution back to a register
/// assignment.
- bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
- const PBQP::Solution &solution);
+ bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
+ const PBQP::Solution &Solution,
+ VirtRegMap &VRM,
+ Spiller &VRegSpiller);
/// \brief Postprocessing before final spilling. Sets basic block "live in"
/// variables.
- void finalizeAlloc() const;
+ void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
+ VirtRegMap &VRM) const;
};
char RegAllocPBQP::ID = 0;
-} // End anonymous namespace.
-
-unsigned PBQPRAProblem::getVRegForNode(PBQPRAGraph::NodeId node) const {
- Node2VReg::const_iterator vregItr = node2VReg.find(node);
- assert(vregItr != node2VReg.end() && "No vreg for node.");
- return vregItr->second;
-}
-
-PBQPRAGraph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
- VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
- assert(nodeItr != vreg2Node.end() && "No node for vreg.");
- return nodeItr->second;
-
-}
+/// @brief Set spill costs for each node in the PBQP reg-alloc graph.
+class SpillCosts : public PBQPRAConstraint {
+public:
+ void apply(PBQPRAGraph &G) override {
+ LiveIntervals &LIS = G.getMetadata().LIS;
+
+ // A minimum spill costs, so that register constraints can can be set
+ // without normalization in the [0.0:MinSpillCost( interval.
+ const PBQP::PBQPNum MinSpillCost = 10.0;
+
+ for (auto NId : G.nodeIds()) {
+ PBQP::PBQPNum SpillCost =
+ LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight;
+ if (SpillCost == 0.0)
+ SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
+ else
+ SpillCost += MinSpillCost;
+ PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
+ NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
+ G.setNodeCosts(NId, std::move(NodeCosts));
+ }
+ }
+};
-const PBQPRAProblem::AllowedSet&
- PBQPRAProblem::getAllowedSet(unsigned vreg) const {
- AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
- assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
- const AllowedSet &allowedSet = allowedSetItr->second;
- return allowedSet;
-}
+/// @brief Add interference edges between overlapping vregs.
+class Interference : public PBQPRAConstraint {
+private:
-unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
- assert(isPRegOption(vreg, option) && "Not a preg option.");
+private:
- const AllowedSet& allowedSet = getAllowedSet(vreg);
- assert(option <= allowedSet.size() && "Option outside allowed set.");
- return allowedSet[option - 1];
-}
+ typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
+ typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey;
+ typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
-PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
- const MachineBlockFrequencyInfo *mbfi,
- const RegSet &vregs) {
+ // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
+ // for the fast interference graph construction algorithm. The last is there
+ // to save us from looking up node ids via the VRegToNode map in the graph
+ // metadata.
+ typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>
+ IntervalInfo;
- LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
- MachineRegisterInfo *mri = &mf->getRegInfo();
- const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+ static SlotIndex getStartPoint(const IntervalInfo &I) {
+ return std::get<0>(I)->segments[std::get<1>(I)].start;
+ }
- std::unique_ptr<PBQPRAProblem> p(new PBQPRAProblem());
- PBQPRAGraph &g = p->getGraph();
- RegSet pregs;
+ static SlotIndex getEndPoint(const IntervalInfo &I) {
+ return std::get<0>(I)->segments[std::get<1>(I)].end;
+ }
- // Collect the set of preg intervals, record that they're used in the MF.
- for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) {
- if (mri->def_empty(Reg))
- continue;
- pregs.insert(Reg);
- mri->setPhysRegUsed(Reg);
+ static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
+ return std::get<2>(I);
}
- // Iterate over vregs.
- for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
- vregItr != vregEnd; ++vregItr) {
- unsigned vreg = *vregItr;
- const TargetRegisterClass *trc = mri->getRegClass(vreg);
- LiveInterval *vregLI = &LIS->getInterval(vreg);
+ static bool lowestStartPoint(const IntervalInfo &I1,
+ const IntervalInfo &I2) {
+ // Condition reversed because priority queue has the *highest* element at
+ // the front, rather than the lowest.
+ return getStartPoint(I1) > getStartPoint(I2);
+ }
- // Record any overlaps with regmask operands.
- BitVector regMaskOverlaps;
- LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
+ static bool lowestEndPoint(const IntervalInfo &I1,
+ const IntervalInfo &I2) {
+ SlotIndex E1 = getEndPoint(I1);
+ SlotIndex E2 = getEndPoint(I2);
- // Compute an initial allowed set for the current vreg.
- typedef std::vector<unsigned> VRAllowed;
- VRAllowed vrAllowed;
- ArrayRef<MCPhysReg> rawOrder = trc->getRawAllocationOrder(*mf);
- for (unsigned i = 0; i != rawOrder.size(); ++i) {
- unsigned preg = rawOrder[i];
- if (mri->isReserved(preg))
- continue;
+ if (E1 < E2)
+ return true;
- // vregLI crosses a regmask operand that clobbers preg.
- if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg))
- continue;
+ if (E1 > E2)
+ return false;
- // vregLI overlaps fixed regunit interference.
- bool Interference = false;
- for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) {
- if (vregLI->overlaps(LIS->getRegUnit(*Units))) {
- Interference = true;
- break;
- }
- }
- if (Interference)
- continue;
-
- // preg is usable for this virtual register.
- vrAllowed.push_back(preg);
- }
-
- PBQP::Vector nodeCosts(vrAllowed.size() + 1, 0);
+ // If two intervals end at the same point, we need a way to break the tie or
+ // the set will assume they're actually equal and refuse to insert a
+ // "duplicate". Just compare the vregs - fast and guaranteed unique.
+ return std::get<0>(I1)->reg < std::get<0>(I2)->reg;
+ }
- PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
- vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+ static bool isAtLastSegment(const IntervalInfo &I) {
+ return std::get<1>(I) == std::get<0>(I)->size() - 1;
+ }
- addSpillCosts(nodeCosts, spillCost);
+ static IntervalInfo nextSegment(const IntervalInfo &I) {
+ return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
+ }
- // Construct the node.
- PBQPRAGraph::NodeId nId = g.addNode(std::move(nodeCosts));
+public:
- // Record the mapping and allowed set in the problem.
- p->recordVReg(vreg, nId, vrAllowed.begin(), vrAllowed.end());
+ void apply(PBQPRAGraph &G) override {
+ // The following is loosely based on the linear scan algorithm introduced in
+ // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
+ // isn't linear, because the size of the active set isn't bound by the
+ // number of registers, but rather the size of the largest clique in the
+ // graph. Still, we expect this to be better than N^2.
+ LiveIntervals &LIS = G.getMetadata().LIS;
+
+ // Interferenc matrices are incredibly regular - they're only a function of
+ // the allowed sets, so we cache them to avoid the overhead of constructing
+ // and uniquing them.
+ IMatrixCache C;
+
+ typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
+ typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
+ decltype(&lowestStartPoint)> IntervalQueue;
+ IntervalSet Active(lowestEndPoint);
+ IntervalQueue Inactive(lowestStartPoint);
+
+ // Start by building the inactive set.
+ for (auto NId : G.nodeIds()) {
+ unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ LiveInterval &LI = LIS.getInterval(VReg);
+ assert(!LI.empty() && "PBQP graph contains node for empty interval");
+ Inactive.push(std::make_tuple(&LI, 0, NId));
+ }
- }
+ while (!Inactive.empty()) {
+ // Tentatively grab the "next" interval - this choice may be overriden
+ // below.
+ IntervalInfo Cur = Inactive.top();
+
+ // Retire any active intervals that end before Cur starts.
+ IntervalSet::iterator RetireItr = Active.begin();
+ while (RetireItr != Active.end() &&
+ (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
+ // If this interval has subsequent segments, add the next one to the
+ // inactive list.
+ if (!isAtLastSegment(*RetireItr))
+ Inactive.push(nextSegment(*RetireItr));
+
+ ++RetireItr;
+ }
+ Active.erase(Active.begin(), RetireItr);
+
+ // One of the newly retired segments may actually start before the
+ // Cur segment, so re-grab the front of the inactive list.
+ Cur = Inactive.top();
+ Inactive.pop();
+
+ // At this point we know that Cur overlaps all active intervals. Add the
+ // interference edges.
+ PBQP::GraphBase::NodeId NId = getNodeId(Cur);
+ for (const auto &A : Active) {
+ PBQP::GraphBase::NodeId MId = getNodeId(A);
+
+ // Check that we haven't already added this edge
+ // FIXME: findEdge is expensive in the worst case (O(max_clique(G))).
+ // It might be better to replace this with a local bit-matrix.
+ if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId())
+ continue;
- for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
- vr1Itr != vrEnd; ++vr1Itr) {
- unsigned vr1 = *vr1Itr;
- const LiveInterval &l1 = lis->getInterval(vr1);
- const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
-
- for (RegSet::const_iterator vr2Itr = std::next(vr1Itr); vr2Itr != vrEnd;
- ++vr2Itr) {
- unsigned vr2 = *vr2Itr;
- const LiveInterval &l2 = lis->getInterval(vr2);
- const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
-
- assert(!l2.empty() && "Empty interval in vreg set?");
- if (l1.overlaps(l2)) {
- PBQP::Matrix edgeCosts(vr1Allowed.size()+1, vr2Allowed.size()+1, 0);
- addInterferenceCosts(edgeCosts, vr1Allowed, vr2Allowed, tri);
-
- g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
- std::move(edgeCosts));
+ // This is a new edge - add it to the graph.
+ createInterferenceEdge(G, NId, MId, C);
}
+
+ // Finally, add Cur to the Active set.
+ Active.insert(Cur);
}
}
- return p.release();
-}
+private:
-void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
- PBQP::PBQPNum spillCost) {
- costVec[0] = spillCost;
-}
+ void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId, IMatrixCache &C) {
-void PBQPBuilder::addInterferenceCosts(
- PBQP::Matrix &costMat,
- const PBQPRAProblem::AllowedSet &vr1Allowed,
- const PBQPRAProblem::AllowedSet &vr2Allowed,
- const TargetRegisterInfo *tri) {
- assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
- assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
+ const TargetRegisterInfo &TRI =
+ *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
- for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
- unsigned preg1 = vr1Allowed[i];
+ const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
+ const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
- for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
- unsigned preg2 = vr2Allowed[j];
+ // Try looking the edge costs up in the IMatrixCache first.
+ IMatrixKey K(&NRegs, &MRegs);
+ IMatrixCache::iterator I = C.find(K);
+ if (I != C.end()) {
+ G.addEdgeBypassingCostAllocator(NId, MId, I->second);
+ return;
+ }
- if (tri->regsOverlap(preg1, preg2)) {
- costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
+ for (unsigned I = 0; I != NRegs.size(); ++I) {
+ unsigned PRegN = NRegs[I];
+ for (unsigned J = 0; J != MRegs.size(); ++J) {
+ unsigned PRegM = MRegs[J];
+ if (TRI.regsOverlap(PRegN, PRegM))
+ M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
}
}
+
+ PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
+ C[K] = G.getEdgeCostsPtr(EId);
}
-}
+};
-PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
- const LiveIntervals *lis,
- const MachineBlockFrequencyInfo *mbfi,
- const RegSet &vregs) {
- std::unique_ptr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs));
- PBQPRAGraph &g = p->getGraph();
+class Coalescing : public PBQPRAConstraint {
+public:
+ void apply(PBQPRAGraph &G) override {
+ MachineFunction &MF = G.getMetadata().MF;
+ MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
+ CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo());
+
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (const auto &MBB : MF) {
+ for (const auto &MI : MBB) {
+
+ // Skip not-coalescable or already coalesced copies.
+ if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
+ continue;
- const TargetMachine &tm = mf->getTarget();
- CoalescerPair cp(*tm.getRegisterInfo());
+ unsigned DstReg = CP.getDstReg();
+ unsigned SrcReg = CP.getSrcReg();
- // Scan the machine function and add a coalescing cost whenever CoalescerPair
- // gives the Ok.
- for (const auto &mbb : *mf) {
- for (const auto &mi : mbb) {
- if (!cp.setRegisters(&mi)) {
- continue; // Not coalescable.
- }
+ const float Scale = 1.0f / MBFI.getEntryFreq();
+ PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale;
- if (cp.getSrcReg() == cp.getDstReg()) {
- continue; // Already coalesced.
- }
+ if (CP.isPhys()) {
+ if (!MF.getRegInfo().isAllocatable(DstReg))
+ continue;
- unsigned dst = cp.getDstReg(),
- src = cp.getSrcReg();
+ PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
- const float copyFactor = 0.5; // Cost of copy relative to load. Current
- // value plucked randomly out of the air.
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
+ G.getNodeMetadata(NId).getAllowedRegs();
- PBQP::PBQPNum cBenefit =
- copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, &mi);
+ unsigned PRegOpt = 0;
+ while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg)
+ ++PRegOpt;
- if (cp.isPhys()) {
- if (!mf->getRegInfo().isAllocatable(dst)) {
- continue;
- }
-
- const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
- unsigned pregOpt = 0;
- while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
- ++pregOpt;
- }
- if (pregOpt < allowed.size()) {
- ++pregOpt; // +1 to account for spill option.
- PBQPRAGraph::NodeId node = p->getNodeForVReg(src);
- llvm::dbgs() << "Reading node costs for node " << node << "\n";
- llvm::dbgs() << "Source node: " << &g.getNodeCosts(node) << "\n";
- PBQP::Vector newCosts(g.getNodeCosts(node));
- addPhysRegCoalesce(newCosts, pregOpt, cBenefit);
- g.setNodeCosts(node, newCosts);
- }
- } else {
- const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
- const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
- PBQPRAGraph::NodeId node1 = p->getNodeForVReg(dst);
- PBQPRAGraph::NodeId node2 = p->getNodeForVReg(src);
- PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2);
- if (edge == g.invalidEdgeId()) {
- PBQP::Matrix costs(allowed1->size() + 1, allowed2->size() + 1, 0);
- addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit);
- g.addEdge(node1, node2, costs);
+ if (PRegOpt < Allowed.size()) {
+ PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
+ NewCosts[PRegOpt + 1] -= CBenefit;
+ G.setNodeCosts(NId, std::move(NewCosts));
+ }
} else {
- if (g.getEdgeNode1Id(edge) == node2) {
- std::swap(node1, node2);
- std::swap(allowed1, allowed2);
+ PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
+ PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
+ &G.getNodeMetadata(N1Id).getAllowedRegs();
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
+ &G.getNodeMetadata(N2Id).getAllowedRegs();
+
+ PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
+ if (EId == G.invalidEdgeId()) {
+ PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
+ Allowed2->size() + 1, 0);
+ addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
+ G.addEdge(N1Id, N2Id, std::move(Costs));
+ } else {
+ if (G.getEdgeNode1Id(EId) == N2Id) {
+ std::swap(N1Id, N2Id);
+ std::swap(Allowed1, Allowed2);
+ }
+ PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
+ addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
+ G.setEdgeCosts(EId, std::move(Costs));
}
- PBQP::Matrix costs(g.getEdgeCosts(edge));
- addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit);
- g.setEdgeCosts(edge, costs);
}
}
}
}
- return p.release();
-}
-
-void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
- unsigned pregOption,
- PBQP::PBQPNum benefit) {
- costVec[pregOption] += -benefit;
-}
-
-void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
- PBQP::Matrix &costMat,
- const PBQPRAProblem::AllowedSet &vr1Allowed,
- const PBQPRAProblem::AllowedSet &vr2Allowed,
- PBQP::PBQPNum benefit) {
-
- assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
- assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
-
- for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
- unsigned preg1 = vr1Allowed[i];
- for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
- unsigned preg2 = vr2Allowed[j];
+private:
- if (preg1 == preg2) {
- costMat[i + 1][j + 1] += -benefit;
+ void addVirtRegCoalesce(
+ PBQPRAGraph::RawMatrix &CostMat,
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
+ PBQP::PBQPNum Benefit) {
+ assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
+ assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
+ for (unsigned I = 0; I != Allowed1.size(); ++I) {
+ unsigned PReg1 = Allowed1[I];
+ for (unsigned J = 0; J != Allowed2.size(); ++J) {
+ unsigned PReg2 = Allowed2[J];
+ if (PReg1 == PReg2)
+ CostMat[I + 1][J + 1] -= Benefit;
}
}
}
-}
+};
+
+} // End anonymous namespace.
+
+// Out-of-line destructor/anchor for PBQPRAConstraint.
+PBQPRAConstraint::~PBQPRAConstraint() {}
+void PBQPRAConstraint::anchor() {}
+void PBQPRAConstraintList::anchor() {}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesCFG();
@@ -437,118 +457,197 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
MachineFunctionPass::getAnalysisUsage(au);
}
-void RegAllocPBQP::findVRegIntervalsToAlloc() {
+void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
+ LiveIntervals &LIS) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
// Iterate over all live ranges.
- for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (mri->reg_nodbg_empty(Reg))
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
continue;
- LiveInterval *li = &lis->getInterval(Reg);
+ LiveInterval &LI = LIS.getInterval(Reg);
// If this live interval is non-empty we will use pbqp to allocate it.
// Empty intervals we allocate in a simple post-processing stage in
// finalizeAlloc.
- if (!li->empty()) {
- vregsToAlloc.insert(li->reg);
+ if (!LI.empty()) {
+ VRegsToAlloc.insert(LI.reg);
} else {
- emptyIntervalVRegs.insert(li->reg);
+ EmptyIntervalVRegs.insert(LI.reg);
+ }
+ }
+}
+
+static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
+ const MachineFunction &MF) {
+ const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSR[i] != 0; ++i)
+ if (TRI.regsOverlap(reg, CSR[i]))
+ return true;
+ return false;
+}
+
+void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
+ MachineFunction &MF = G.getMetadata().MF;
+
+ LiveIntervals &LIS = G.getMetadata().LIS;
+ const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+ const TargetRegisterInfo &TRI =
+ *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+
+ for (auto VReg : VRegsToAlloc) {
+ const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
+ LiveInterval &VRegLI = LIS.getInterval(VReg);
+
+ // Record any overlaps with regmask operands.
+ BitVector RegMaskOverlaps;
+ LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
+
+ // Compute an initial allowed set for the current vreg.
+ std::vector<unsigned> VRegAllowed;
+ ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
+ for (unsigned I = 0; I != RawPRegOrder.size(); ++I) {
+ unsigned PReg = RawPRegOrder[I];
+ if (MRI.isReserved(PReg))
+ continue;
+
+ // vregLI crosses a regmask operand that clobbers preg.
+ if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
+ continue;
+
+ // vregLI overlaps fixed regunit interference.
+ bool Interference = false;
+ for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) {
+ if (VRegLI.overlaps(LIS.getRegUnit(*Units))) {
+ Interference = true;
+ break;
+ }
+ }
+ if (Interference)
+ continue;
+
+ // preg is usable for this virtual register.
+ VRegAllowed.push_back(PReg);
}
+
+ PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
+
+ // Tweak cost of callee saved registers, as using then force spilling and
+ // restoring them. This would only happen in the prologue / epilogue though.
+ for (unsigned i = 0; i != VRegAllowed.size(); ++i)
+ if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
+ NodeCosts[1 + i] += 1.0;
+
+ PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
+ G.getNodeMetadata(NId).setVReg(VReg);
+ G.getNodeMetadata(NId).setAllowedRegs(
+ G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
+ G.getMetadata().setNodeIdForVReg(VReg, NId);
}
}
-bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
- const PBQP::Solution &solution) {
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
+ const PBQP::Solution &Solution,
+ VirtRegMap &VRM,
+ Spiller &VRegSpiller) {
+ MachineFunction &MF = G.getMetadata().MF;
+ LiveIntervals &LIS = G.getMetadata().LIS;
+ const TargetRegisterInfo &TRI =
+ *MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ (void)TRI;
+
// Set to true if we have any spills
- bool anotherRoundNeeded = false;
+ bool AnotherRoundNeeded = false;
// Clear the existing allocation.
- vrm->clearAllVirt();
+ VRM.clearAllVirt();
- const PBQPRAGraph &g = problem.getGraph();
// Iterate over the nodes mapping the PBQP solution to a register
// assignment.
- for (auto NId : g.nodeIds()) {
- unsigned vreg = problem.getVRegForNode(NId);
- unsigned alloc = solution.getSelection(NId);
-
- if (problem.isPRegOption(vreg, alloc)) {
- unsigned preg = problem.getPRegForOption(vreg, alloc);
- DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> "
- << tri->getName(preg) << "\n");
- assert(preg != 0 && "Invalid preg selected.");
- vrm->assignVirt2Phys(vreg, preg);
- } else if (problem.isSpillOption(vreg, alloc)) {
- vregsToAlloc.erase(vreg);
- SmallVector<unsigned, 8> newSpills;
- LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
- spiller->spill(LRE);
-
- DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: "
+ for (auto NId : G.nodeIds()) {
+ unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ unsigned AllocOption = Solution.getSelection(NId);
+
+ if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
+ unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
+ DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> "
+ << TRI.getName(PReg) << "\n");
+ assert(PReg != 0 && "Invalid preg selected.");
+ VRM.assignVirt2Phys(VReg, PReg);
+ } else {
+ VRegsToAlloc.erase(VReg);
+ SmallVector<unsigned, 8> NewSpills;
+ LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM);
+ VRegSpiller.spill(LRE);
+
+ DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
<< LRE.getParent().weight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
- for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
- itr != end; ++itr) {
- LiveInterval &li = lis->getInterval(*itr);
- assert(!li.empty() && "Empty spill range.");
- DEBUG(dbgs() << PrintReg(li.reg, tri) << " ");
- vregsToAlloc.insert(li.reg);
+ for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
+ I != E; ++I) {
+ LiveInterval &LI = LIS.getInterval(*I);
+ assert(!LI.empty() && "Empty spill range.");
+ DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
+ VRegsToAlloc.insert(LI.reg);
}
DEBUG(dbgs() << ")\n");
// We need another round if spill intervals were added.
- anotherRoundNeeded |= !LRE.empty();
- } else {
- llvm_unreachable("Unknown allocation option.");
+ AnotherRoundNeeded |= !LRE.empty();
}
}
- return !anotherRoundNeeded;
+ return !AnotherRoundNeeded;
}
+void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
+ LiveIntervals &LIS,
+ VirtRegMap &VRM) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
-void RegAllocPBQP::finalizeAlloc() const {
// First allocate registers for the empty intervals.
for (RegSet::const_iterator
- itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
- itr != end; ++itr) {
- LiveInterval *li = &lis->getInterval(*itr);
+ I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end();
+ I != E; ++I) {
+ LiveInterval &LI = LIS.getInterval(*I);
- unsigned physReg = mri->getSimpleHint(li->reg);
+ unsigned PReg = MRI.getSimpleHint(LI.reg);
- if (physReg == 0) {
- const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
- physReg = liRC->getRawAllocationOrder(*mf).front();
+ if (PReg == 0) {
+ const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
+ PReg = RC.getRawAllocationOrder(MF).front();
}
- vrm->assignVirt2Phys(li->reg, physReg);
+ VRM.assignVirt2Phys(LI.reg, PReg);
}
}
-bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
+ unsigned NumInstr) {
+ // All intervals have a spill weight that is mostly proportional to the number
+ // of uses, with uses in loops having a bigger weight.
+ return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1);
+}
- mf = &MF;
- tm = &mf->getTarget();
- tri = tm->getRegisterInfo();
- tii = tm->getInstrInfo();
- mri = &mf->getRegInfo();
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineBlockFrequencyInfo &MBFI =
+ getAnalysis<MachineBlockFrequencyInfo>();
- lis = &getAnalysis<LiveIntervals>();
- lss = &getAnalysis<LiveStacks>();
- mbfi = &getAnalysis<MachineBlockFrequencyInfo>();
+ calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI,
+ normalizePBQPSpillWeight);
- calculateSpillWeightsAndHints(*lis, MF, getAnalysis<MachineLoopInfo>(),
- *mbfi);
+ VirtRegMap &VRM = getAnalysis<VirtRegMap>();
- vrm = &getAnalysis<VirtRegMap>();
- spiller.reset(createInlineSpiller(*this, MF, *vrm));
+ std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
- mri->freezeReservedRegs(MF);
+ MF.getRegInfo().freezeReservedRegs(MF);
- DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n");
+ DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
// Allocator main loop:
//
@@ -560,72 +659,72 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// This process is continued till no more spills are generated.
// Find the vreg intervals in need of allocation.
- findVRegIntervalsToAlloc();
+ findVRegIntervalsToAlloc(MF, LIS);
#ifndef NDEBUG
- const Function* func = mf->getFunction();
- std::string fqn =
- func->getParent()->getModuleIdentifier() + "." +
- func->getName().str();
+ const Function &F = *MF.getFunction();
+ std::string FullyQualifiedName =
+ F.getParent()->getModuleIdentifier() + "." + F.getName().str();
#endif
// If there are non-empty intervals allocate them using pbqp.
- if (!vregsToAlloc.empty()) {
+ if (!VRegsToAlloc.empty()) {
- bool pbqpAllocComplete = false;
- unsigned round = 0;
+ const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl();
+ std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
+ llvm::make_unique<PBQPRAConstraintList>();
+ ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
+ ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
+ if (PBQPCoalescing)
+ ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
+ ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
- while (!pbqpAllocComplete) {
- DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
+ bool PBQPAllocComplete = false;
+ unsigned Round = 0;
- std::unique_ptr<PBQPRAProblem> problem(
- builder->build(mf, lis, mbfi, vregsToAlloc));
+ while (!PBQPAllocComplete) {
+ DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
+
+ PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
+ initializeGraph(G);
+ ConstraintsRoot->apply(G);
#ifndef NDEBUG
- if (pbqpDumpGraphs) {
- std::ostringstream rs;
- rs << round;
- std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
- std::string tmp;
- raw_fd_ostream os(graphFileName.c_str(), tmp, sys::fs::F_Text);
- DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
- << graphFileName << "\"\n");
- problem->getGraph().dump(os);
+ if (PBQPDumpGraphs) {
+ std::ostringstream RS;
+ RS << Round;
+ std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
+ ".pbqpgraph";
+ std::error_code EC;
+ raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
+ DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
+ << GraphFileName << "\"\n");
+ G.dumpToStream(OS);
}
#endif
- PBQP::Solution solution =
- PBQP::RegAlloc::solve(problem->getGraph());
-
- pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
-
- ++round;
+ PBQP::Solution Solution = PBQP::RegAlloc::solve(G);
+ PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
+ ++Round;
}
}
// Finalise allocation, allocate empty ranges.
- finalizeAlloc();
- vregsToAlloc.clear();
- emptyIntervalVRegs.clear();
+ finalizeAlloc(MF, LIS, VRM);
+ VRegsToAlloc.clear();
+ EmptyIntervalVRegs.clear();
- DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
return true;
}
-FunctionPass *
-llvm::createPBQPRegisterAllocator(std::unique_ptr<PBQPBuilder> &builder,
- char *customPassID) {
- return new RegAllocPBQP(builder, customPassID);
+FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
+ return new RegAllocPBQP(customPassID);
}
FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
- std::unique_ptr<PBQPBuilder> Builder;
- if (pbqpCoalescing)
- Builder.reset(new PBQPBuilderWithCoalescing());
- else
- Builder.reset(new PBQPBuilder());
- return createPBQPRegisterAllocator(Builder);
+ return createPBQPRegisterAllocator();
}
#undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 8b5445c..e0d1aa2 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -20,7 +20,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -38,8 +37,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
MF = &mf;
// Allocate new array the first time we see a new target.
- if (MF->getTarget().getRegisterInfo() != TRI) {
- TRI = MF->getTarget().getRegisterInfo();
+ if (MF->getSubtarget().getRegisterInfo() != TRI) {
+ TRI = MF->getSubtarget().getRegisterInfo();
RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
unsigned NumPSets = TRI->getNumRegPressureSets();
PSetLimits.reset(new unsigned[NumPSets]);
@@ -138,7 +137,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
RCI.LastCostChange = LastCostChange;
DEBUG({
- dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
+ dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = [";
for (unsigned I = 0; I != RCI.NumRegs; ++I)
dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 65b0528..2d2dc92 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -94,11 +94,11 @@ namespace {
/// blocks exclusively containing copies.
bool JoinSplitEdges;
- /// WorkList - Copy instructions yet to be coalesced.
+ /// Copy instructions yet to be coalesced.
SmallVector<MachineInstr*, 8> WorkList;
SmallVector<MachineInstr*, 8> LocalWorkList;
- /// ErasedInstrs - Set of instruction pointers that have been erased, and
+ /// Set of instruction pointers that have been erased, and
/// that may be present in WorkList.
SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
@@ -114,21 +114,21 @@ namespace {
/// LiveRangeEdit callback.
void LRE_WillEraseInstruction(MachineInstr *MI) override;
- /// coalesceLocals - coalesce the LocalWorkList.
+ /// Coalesce the LocalWorkList.
void coalesceLocals();
- /// joinAllIntervals - join compatible live intervals
+ /// Join compatible live intervals
void joinAllIntervals();
- /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// Coalesce copies in the specified MBB, putting
/// copies that cannot yet be coalesced into WorkList.
void copyCoalesceInMBB(MachineBasicBlock *MBB);
- /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return
+ /// Try to coalesce all copies in CurrList. Return
/// true if any progress was made.
bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
- /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns
/// true if the copy was successfully coalesced away. If it is not
/// currently possible to coalesce this interval, but it may be possible if
@@ -136,7 +136,7 @@ namespace {
/// 'Again'.
bool joinCopy(MachineInstr *TheCopy, bool &Again);
- /// joinIntervals - Attempt to join these two intervals. On failure, this
+ /// Attempt to join these two intervals. On failure, this
/// returns false. The output "SrcInt" will not have been modified, so we
/// can use this information below to update aliases.
bool joinIntervals(CoalescerPair &CP);
@@ -147,39 +147,39 @@ namespace {
/// Attempt joining with a reserved physreg.
bool joinReservedPhysReg(CoalescerPair &CP);
- /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// We found a non-trivially-coalescable copy. If
/// the source value number is defined by a copy from the destination reg
/// see if we can merge these two destination reg valno# into a single
/// value number, eliminating a copy.
bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
- /// hasOtherReachingDefs - Return true if there are definitions of IntB
+ /// Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
VNInfo *AValNo, VNInfo *BValNo);
- /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// We found a non-trivially-coalescable copy.
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
- /// reMaterializeTrivialDef - If the source of a copy is defined by a
+ /// If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI,
bool &IsDefCopy);
- /// canJoinPhys - Return true if a physreg copy should be joined.
+ /// Return true if a physreg copy should be joined.
bool canJoinPhys(const CoalescerPair &CP);
- /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
/// physical register and the existing subregister number of the def / use
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
- /// eliminateUndefCopy - Handle copies of undef values.
+ /// Handle copies of undef values.
bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
public:
@@ -192,10 +192,10 @@ namespace {
void releaseMemory() override;
- /// runOnMachineFunction - pass entry point
+ /// This is the pass entry point.
bool runOnMachineFunction(MachineFunction&) override;
- /// print - Implement the dump method.
+ /// Implement the dump method.
void print(raw_ostream &O, const Module* = nullptr) const override;
};
} /// end anonymous namespace
@@ -407,7 +407,7 @@ void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
ErasedInstrs.insert(MI);
}
-/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// We found a non-trivially-coalescable copy with IntA
/// being the source and IntB being the dest, thus this defines a value number
/// in IntB. If the source value number (in IntA) is defined by a copy from B,
/// see if we can merge these two pieces of B into a single value number,
@@ -512,7 +512,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
return true;
}
-/// hasOtherReachingDefs - Return true if there are definitions of IntB
+/// Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
LiveInterval &IntB,
@@ -542,7 +542,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
return false;
}
-/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// We found a non-trivially-coalescable copy with
/// IntA being the source and IntB being the dest, thus this defines a value
/// number in IntB. If the source value number (in IntA) is defined by a
/// commutable instruction and its other operand is coalesced to the copy dest
@@ -725,7 +725,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return true;
}
-/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
MachineInstr *CopyMI,
@@ -904,7 +904,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
return true;
}
-/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef>
+/// ProcessImpicitDefs may leave some copies of <undef>
/// values, it only removes local variables. When we have a copy like:
///
/// %vreg1 = COPY %vreg2<undef>
@@ -944,11 +944,10 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
return true;
}
-/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
-/// update the subregister number if it is not zero. If DstReg is a
-/// physical register and the existing subregister number of the def / use
-/// being updated is not zero, make sure to set it to the correct physical
-/// subregister.
+/// Replace all defs and uses of SrcReg to DstReg and update the subregister
+/// number if it is not zero. If DstReg is a physical register and the existing
+/// subregister number of the def / use being updated is not zero, make sure to
+/// set it to the correct physical subregister.
void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
unsigned DstReg,
unsigned SubIdx) {
@@ -966,7 +965,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// the UseMI operands removes them from the SrcReg use-def chain, but when
// SrcReg is DstReg we could encounter UseMI twice if it has multiple
// operands mentioning the virtual register.
- if (SrcReg == DstReg && !Visited.insert(UseMI))
+ if (SrcReg == DstReg && !Visited.insert(UseMI).second)
continue;
SmallVector<unsigned,8> Ops;
@@ -1003,7 +1002,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
}
}
-/// canJoinPhys - Return true if a copy involving a physreg should be joined.
+/// Return true if a copy involving a physreg should be joined.
bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
@@ -1021,7 +1020,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
return false;
}
-/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns true
/// if the copy was successfully coalesced away. If it is not currently
/// possible to coalesce this interval, but it may be possible if other
@@ -1037,6 +1036,22 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
return false;
}
+ if (CP.getNewRC()) {
+ auto SrcRC = MRI->getRegClass(CP.getSrcReg());
+ auto DstRC = MRI->getRegClass(CP.getDstReg());
+ unsigned SrcIdx = CP.getSrcIdx();
+ unsigned DstIdx = CP.getDstIdx();
+ if (CP.isFlipped()) {
+ std::swap(SrcIdx, DstIdx);
+ std::swap(SrcRC, DstRC);
+ }
+ if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
+ CP.getNewRC())) {
+ DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
+ return false;
+ }
+ }
+
// Dead code elimination. This really should be handled by MachineDCE, but
// sometimes dead copies slip through, and we can't generate invalid live
// ranges.
@@ -1090,9 +1105,14 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
return false;
}
} else {
+ // When possible, let DstReg be the larger interval.
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
+ LIS->getInterval(CP.getDstReg()).size())
+ CP.flip();
+
DEBUG({
- dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName()
- << " with ";
+ dbgs() << "\tConsidering merging to "
+ << TRI->getRegClassName(CP.getNewRC()) << " with ";
if (CP.getDstIdx() && CP.getSrcIdx())
dbgs() << PrintReg(CP.getDstReg()) << " in "
<< TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
@@ -1102,11 +1122,6 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
<< PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
});
-
- // When possible, let DstReg be the larger interval.
- if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
- LIS->getInterval(CP.getDstReg()).size())
- CP.flip();
}
// Okay, attempt to join these two intervals. On failure, this returns false.
@@ -1171,7 +1186,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
DEBUG({
- dbgs() << "\tJoined. Result = ";
+ dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
+ << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
+ dbgs() << "\tResult = ";
if (CP.isPhys())
dbgs() << PrintReg(CP.getDstReg(), TRI);
else
@@ -1423,7 +1440,7 @@ public:
/// Add erased instructions to ErasedInstrs.
/// Add foreign virtual registers to ShrinkRegs if their live range ended at
/// the erased instrs.
- void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+ void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs);
/// Get the value assignments suitable for passing to LiveInterval::join.
@@ -1936,7 +1953,7 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}
-void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs) {
for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
// Get the def location before markUnused() below invalidates it.
@@ -2035,8 +2052,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
return true;
}
-/// joinIntervals - Attempt to join these two intervals. On failure, this
-/// returns false.
+/// Attempt to join these two intervals. On failure, this returns false.
bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
}
@@ -2208,8 +2224,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &fn.getRegInfo();
TM = &fn.getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
+ TRI = TM->getSubtargetImpl()->getRegisterInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
@@ -2250,7 +2266,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
continue;
if (MRI->recomputeRegClass(Reg, *TM)) {
DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
- << MRI->getRegClass(Reg)->getName() << '\n');
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
++NumInflated;
}
}
@@ -2261,7 +2277,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
return true;
}
-/// print - Implement the dump method.
+/// Implement the dump method.
void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
LIS->print(O, m);
}
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index e57ceab..04067a1 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
-#define LLVM_CODEGEN_REGISTER_COALESCER_H
+#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
+#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
namespace llvm {
@@ -22,38 +22,36 @@ namespace llvm {
class TargetRegisterClass;
class TargetInstrInfo;
- /// CoalescerPair - A helper class for register coalescers. When deciding if
+ /// A helper class for register coalescers. When deciding if
/// two registers can be coalesced, CoalescerPair can determine if a copy
/// instruction would become an identity copy after coalescing.
class CoalescerPair {
const TargetRegisterInfo &TRI;
- /// DstReg - The register that will be left after coalescing. It can be a
+ /// The register that will be left after coalescing. It can be a
/// virtual or physical register.
unsigned DstReg;
- /// SrcReg - the virtual register that will be coalesced into dstReg.
+ /// The virtual register that will be coalesced into dstReg.
unsigned SrcReg;
- /// DstIdx - The sub-register index of the old DstReg in the new coalesced
- /// register.
+ /// The sub-register index of the old DstReg in the new coalesced register.
unsigned DstIdx;
- /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced
- /// register.
+ /// The sub-register index of the old SrcReg in the new coalesced register.
unsigned SrcIdx;
- /// Partial - True when the original copy was a partial subregister copy.
+ /// True when the original copy was a partial subregister copy.
bool Partial;
- /// CrossClass - True when both regs are virtual, and newRC is constrained.
+ /// True when both regs are virtual and newRC is constrained.
bool CrossClass;
- /// Flipped - True when DstReg and SrcReg are reversed from the original
+ /// True when DstReg and SrcReg are reversed from the original
/// copy instruction.
bool Flipped;
- /// NewRC - The register class of the coalesced register, or NULL if DstReg
+ /// The register class of the coalesced register, or NULL if DstReg
/// is a physreg. This register class may be a super-register of both
/// SrcReg and DstReg.
const TargetRegisterClass *NewRC;
@@ -70,49 +68,47 @@ namespace llvm {
: TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
- /// setRegisters - set registers to match the copy instruction MI. Return
+ /// Set registers to match the copy instruction MI. Return
/// false if MI is not a coalescable copy instruction.
bool setRegisters(const MachineInstr*);
- /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible
+ /// Swap SrcReg and DstReg. Return false if swapping is impossible
/// because DstReg is a physical register, or SubIdx is set.
bool flip();
- /// isCoalescable - Return true if MI is a copy instruction that will become
+ /// Return true if MI is a copy instruction that will become
/// an identity copy after coalescing.
bool isCoalescable(const MachineInstr*) const;
- /// isPhys - Return true if DstReg is a physical register.
+ /// Return true if DstReg is a physical register.
bool isPhys() const { return !NewRC; }
- /// isPartial - Return true if the original copy instruction did not copy
+ /// Return true if the original copy instruction did not copy
/// the full register, but was a subreg operation.
bool isPartial() const { return Partial; }
- /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller
+ /// Return true if DstReg is virtual and NewRC is a smaller
/// register class than DstReg's.
bool isCrossClass() const { return CrossClass; }
- /// isFlipped - Return true when getSrcReg is the register being defined by
+ /// Return true when getSrcReg is the register being defined by
/// the original copy instruction.
bool isFlipped() const { return Flipped; }
- /// getDstReg - Return the register (virtual or physical) that will remain
+ /// Return the register (virtual or physical) that will remain
/// after coalescing.
unsigned getDstReg() const { return DstReg; }
- /// getSrcReg - Return the virtual register that will be coalesced away.
+ /// Return the virtual register that will be coalesced away.
unsigned getSrcReg() const { return SrcReg; }
- /// getDstIdx - Return the subregister index that DstReg will be coalesced
- /// into, or 0.
+ /// Return the subregister index that DstReg will be coalesced into, or 0.
unsigned getDstIdx() const { return DstIdx; }
- /// getSrcIdx - Return the subregister index that SrcReg will be coalesced
- /// into, or 0.
+ /// Return the subregister index that SrcReg will be coalesced into, or 0.
unsigned getSrcIdx() const { return SrcIdx; }
- /// getNewRC - Return the register class of the coalesced register.
+ /// Return the register class of the coalesced register.
const TargetRegisterClass *getNewRC() const { return NewRC; }
};
} // End llvm namespace
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 617e459..9925efb 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -185,7 +184,7 @@ void RegPressureTracker::init(const MachineFunction *mf,
reset();
MF = mf;
- TRI = MF->getTarget().getRegisterInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
RCI = rci;
MRI = &MF->getRegInfo();
MBB = mbb;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 72b6285..7626dd2 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -24,24 +24,16 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
-/// setUsed - Set the register and its sub-registers as being used.
-void RegScavenger::setUsed(unsigned Reg) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- RegsAvailable.reset(*SubRegs);
-}
-
-bool RegScavenger::isAliasUsed(unsigned Reg) const {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- if (isUsed(*AI, *AI == Reg))
- return true;
- return false;
+/// setUsed - Set the register units of this register as used.
+void RegScavenger::setRegUsed(unsigned Reg) {
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ RegUnitsAvailable.reset(*RUI);
}
void RegScavenger::initRegState() {
@@ -51,8 +43,8 @@ void RegScavenger::initRegState() {
I->Restore = nullptr;
}
- // All registers started out unused.
- RegsAvailable.set();
+ // All register units start out unused.
+ RegUnitsAvailable.set();
if (!MBB)
return;
@@ -60,22 +52,21 @@ void RegScavenger::initRegState() {
// Live-in registers are in use.
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I)
- setUsed(*I);
+ setRegUsed(*I);
// Pristine CSRs are also unavailable.
BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
for (int I = PR.find_first(); I>0; I = PR.find_next(I))
- setUsed(I);
+ setRegUsed(I);
}
void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
MachineFunction &MF = *mbb->getParent();
- const TargetMachine &TM = MF.getTarget();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+ assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
"Target changed?");
// It is not possible to use the register scavenger after late optimization
@@ -85,17 +76,11 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
// Self-initialize.
if (!MBB) {
- NumPhysRegs = TRI->getNumRegs();
- RegsAvailable.resize(NumPhysRegs);
- KillRegs.resize(NumPhysRegs);
- DefRegs.resize(NumPhysRegs);
-
- // Create callee-saved registers bitvector.
- CalleeSavedRegs.resize(NumPhysRegs);
- const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
- if (CSRegs != nullptr)
- for (unsigned i = 0; CSRegs[i]; ++i)
- CalleeSavedRegs.set(CSRegs[i]);
+ NumRegUnits = TRI->getNumRegUnits();
+ RegUnitsAvailable.resize(NumRegUnits);
+ KillRegUnits.resize(NumRegUnits);
+ DefRegUnits.resize(NumRegUnits);
+ TmpRegUnits.resize(NumRegUnits);
}
MBB = mbb;
@@ -104,10 +89,9 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
Tracking = false;
}
-void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- BV.set(*SubRegs);
+void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ BV.set(*RUI);
}
void RegScavenger::determineKillsAndDefs() {
@@ -122,12 +106,25 @@ void RegScavenger::determineKillsAndDefs() {
// predicated, conservatively assume "kill" markers do not actually kill the
// register. Similarly ignores "dead" markers.
bool isPred = TII->isPredicated(MI);
- KillRegs.reset();
- DefRegs.reset();
+ KillRegUnits.reset();
+ DefRegUnits.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isRegMask())
- (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask());
+ if (MO.isRegMask()) {
+
+ TmpRegUnits.clear();
+ for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) {
+ for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
+ if (MO.clobbersPhysReg(*RURI)) {
+ TmpRegUnits.set(RU);
+ break;
+ }
+ }
+ }
+
+ // Apply the mask.
+ (isPred ? DefRegUnits : KillRegUnits) |= TmpRegUnits;
+ }
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -139,13 +136,13 @@ void RegScavenger::determineKillsAndDefs() {
if (MO.isUndef())
continue;
if (!isPred && MO.isKill())
- addRegWithSubRegs(KillRegs, Reg);
+ addRegUnits(KillRegUnits, Reg);
} else {
assert(MO.isDef());
if (!isPred && MO.isDead())
- addRegWithSubRegs(KillRegs, Reg);
+ addRegUnits(KillRegUnits, Reg);
else
- addRegWithSubRegs(DefRegs, Reg);
+ addRegUnits(DefRegUnits, Reg);
}
}
}
@@ -158,8 +155,8 @@ void RegScavenger::unprocess() {
determineKillsAndDefs();
// Commit the changes.
- setUsed(KillRegs);
- setUnused(DefRegs);
+ setUsed(KillRegUnits);
+ setUnused(DefRegUnits);
}
if (MBBI == MBB->begin()) {
@@ -208,7 +205,7 @@ void RegScavenger::forward() {
if (MO.isUse()) {
if (MO.isUndef())
continue;
- if (!isUsed(Reg)) {
+ if (!isRegUsed(Reg)) {
// Check if it's partial live: e.g.
// D0 = insert_subreg D0<undef>, S0
// ... D0
@@ -219,15 +216,23 @@ void RegScavenger::forward() {
// insert_subreg around causes both correctness and performance issues.
bool SubUsed = false;
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- if (isUsed(*SubRegs)) {
+ if (isRegUsed(*SubRegs)) {
SubUsed = true;
break;
}
- if (!SubUsed) {
+ bool SuperUsed = false;
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
+ if (isRegUsed(*SR)) {
+ SuperUsed = true;
+ break;
+ }
+ }
+ if (!SubUsed && !SuperUsed) {
MBB->getParent()->verify(nullptr, "In Register Scavenger");
llvm_unreachable("Using an undefined register!");
}
(void)SubUsed;
+ (void)SuperUsed;
}
} else {
assert(MO.isDef());
@@ -243,23 +248,23 @@ void RegScavenger::forward() {
#endif // NDEBUG
// Commit the changes.
- setUnused(KillRegs);
- setUsed(DefRegs);
+ setUnused(KillRegUnits);
+ setUsed(DefRegUnits);
}
-void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
- used = RegsAvailable;
- used.flip();
- if (includeReserved)
- used |= MRI->getReservedRegs();
- else
- used.reset(MRI->getReservedRegs());
+bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
+ if (includeReserved && isReserved(Reg))
+ return true;
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ if (!RegUnitsAvailable.test(*RUI))
+ return true;
+ return false;
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (!isAliasUsed(*I)) {
+ if (!isRegUsed(*I)) {
DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
"\n");
return *I;
@@ -273,13 +278,13 @@ BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
BitVector Mask(TRI->getNumRegs());
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (!isAliasUsed(*I))
+ if (!isRegUsed(*I))
Mask.set(*I);
return Mask;
}
/// findSurvivorReg - Return the candidate register that is unused for the
-/// longest after StargMII. UseMI is set to the instruction where the search
+/// longest after StartMII. UseMI is set to the instruction where the search
/// stopped.
///
/// No more than InstrLimit instructions are inspected.
@@ -375,9 +380,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
}
// Try to find a register that's unused if there is one, as then we won't
- // have to spill. Search explicitly rather than masking out based on
- // RegsAvailable, as RegsAvailable does not take aliases into account.
- // That's what getRegsAvailable() is for.
+ // have to spill.
BitVector Available = getRegsAvailable(RC);
Available &= Candidates;
if (Available.any())
@@ -388,7 +391,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
// If we found an unused register there is no reason to spill it.
- if (!isAliasUsed(SReg)) {
+ if (!isRegUsed(SReg)) {
DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
return SReg;
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 6a2a080..6f8b337 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -21,6 +21,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -35,11 +36,9 @@ static cl::opt<bool> StressSchedOpt(
void SchedulingPriorityQueue::anchor() { }
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
- : TM(mf.getTarget()),
- TII(TM.getInstrInfo()),
- TRI(TM.getRegisterInfo()),
- MF(mf), MRI(mf.getRegInfo()),
- EntrySU(), ExitSU() {
+ : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()),
+ TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf),
+ MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
#ifndef NDEBUG
StressSched = StressSchedOpt;
#endif
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 0f8b21c..d8d8422 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -50,12 +50,11 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction"));
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
- const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt,
+ const MachineLoopInfo *mli,
bool IsPostRAFlag,
bool RemoveKillFlags,
LiveIntervals *lis)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+ : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
CanHandleTerminators(false), FirstDbgValue(nullptr) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
@@ -64,7 +63,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
"Virtual registers must be removed prior to PostRA scheduling");
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
}
/// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -110,7 +109,7 @@ static void getUnderlyingObjects(const Value *V,
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
I != IE; ++I) {
V = *I;
- if (!Visited.insert(V))
+ if (!Visited.insert(V).second)
continue;
if (Operator::getOpcode(V) == Instruction::IntToPtr) {
const Value *O =
@@ -512,9 +511,18 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
MachineInstr *MIa,
MachineInstr *MIb) {
+ const MachineFunction *MF = MIa->getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
// Cover a trivial case - no edge is need to itself.
if (MIa == MIb)
return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if ((MIa->mayLoad() || MIa->mayStore()) &&
+ (MIb->mayLoad() || MIb->mayStore()))
+ if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
+ return false;
// FIXME: Need to handle multiple memory operands to support all targets.
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
@@ -563,9 +571,9 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
AliasAnalysis::AliasResult AAResult = AA->alias(
AliasAnalysis::Location(MMOa->getValue(), Overlapa,
- UseTBAA ? MMOa->getTBAAInfo() : nullptr),
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
AliasAnalysis::Location(MMOb->getValue(), Overlapb,
- UseTBAA ? MMOb->getTBAAInfo() : nullptr));
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != AliasAnalysis::NoAlias);
}
@@ -575,12 +583,12 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
static unsigned
iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
- SmallPtrSet<const SUnit*, 16> &Visited) {
+ SmallPtrSetImpl<const SUnit*> &Visited) {
if (!SUa || !SUb || SUb == ExitSU)
return *Depth;
// Remember visited nodes.
- if (!Visited.insert(SUb))
+ if (!Visited.insert(SUb).second)
return *Depth;
// If there is _some_ dependency already in place, do not
// descend any further.
@@ -656,7 +664,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
bool isNormalMemory = false) {
// If this is a false dependency,
// do not add the edge, but rememeber the rejected node.
- if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
SUb->addPred(Dep);
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index f59c6cf..b2e4617 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -21,7 +21,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <fstream>
using namespace llvm;
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 004c685..38833a4 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -78,7 +78,7 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II,
DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
else {
// A nonempty itinerary must have a SchedModel.
- IssueWidth = ItinData->SchedModel->IssueWidth;
+ IssueWidth = ItinData->SchedModel.IssueWidth;
DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
<< ScoreboardDepth << '\n');
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7c42e4d..a1291ed 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17,7 +17,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -32,7 +34,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -76,6 +77,10 @@ namespace {
"slicing"),
cl::init(false));
+ static cl::opt<bool>
+ MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner may split indexing from loads"));
+
//------------------------------ DAGCombiner ---------------------------------//
class DAGCombiner {
@@ -87,56 +92,70 @@ namespace {
bool LegalTypes;
bool ForCodeSize;
- // Worklist of all of the nodes that need to be simplified.
- //
- // This has the semantics that when adding to the worklist,
- // the item added must be next to be processed. It should
- // also only appear once. The naive approach to this takes
- // linear time.
- //
- // To reduce the insert/remove time to logarithmic, we use
- // a set and a vector to maintain our worklist.
- //
- // The set contains the items on the worklist, but does not
- // maintain the order they should be visited.
- //
- // The vector maintains the order nodes should be visited, but may
- // contain duplicate or removed nodes. When choosing a node to
- // visit, we pop off the order stack until we find an item that is
- // also in the contents set. All operations are O(log N).
- SmallPtrSet<SDNode*, 64> WorkListContents;
- SmallVector<SDNode*, 64> WorkListOrder;
+ /// \brief Worklist of all of the nodes that need to be simplified.
+ ///
+ /// This must behave as a stack -- new nodes to process are pushed onto the
+ /// back and when processing we pop off of the back.
+ ///
+ /// The worklist will not contain duplicates but may contain null entries
+ /// due to nodes being deleted from the underlying DAG.
+ SmallVector<SDNode *, 64> Worklist;
+
+ /// \brief Mapping from an SDNode to its position on the worklist.
+ ///
+ /// This is used to find and remove nodes from the worklist (by nulling
+ /// them) when they are deleted from the underlying DAG. It relies on
+ /// stable indices of nodes within the worklist.
+ DenseMap<SDNode *, unsigned> WorklistMap;
+
+ /// \brief Set of nodes which have been combined (at least once).
+ ///
+ /// This is used to allow us to reliably add any operands of a DAG node
+ /// which have not yet been combined to the worklist.
+ SmallPtrSet<SDNode *, 64> CombinedNodes;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
- /// AddUsersToWorkList - When an instruction is simplified, add all users of
- /// the instruction to the work lists because they might get more simplified
- /// now.
- ///
- void AddUsersToWorkList(SDNode *N) {
+ /// When an instruction is simplified, add all users of the instruction to
+ /// the work lists because they might get more simplified now.
+ void AddUsersToWorklist(SDNode *N) {
for (SDNode *Node : N->uses())
- AddToWorkList(Node);
+ AddToWorklist(Node);
}
- /// visit - call the node-specific routine that knows how to fold each
- /// particular type of node.
+ /// Call the node-specific routine that folds each particular type of node.
SDValue visit(SDNode *N);
public:
- /// AddToWorkList - Add to the work list making sure its instance is at the
- /// back (next to be processed.)
- void AddToWorkList(SDNode *N) {
- WorkListContents.insert(N);
- WorkListOrder.push_back(N);
+ /// Add to the worklist making sure its instance is at the back (next to be
+ /// processed.)
+ void AddToWorklist(SDNode *N) {
+ // Skip handle nodes as they can't usefully be combined and confuse the
+ // zero-use deletion strategy.
+ if (N->getOpcode() == ISD::HANDLENODE)
+ return;
+
+ if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
+ Worklist.push_back(N);
}
- /// removeFromWorkList - remove all instances of N from the worklist.
- ///
- void removeFromWorkList(SDNode *N) {
- WorkListContents.erase(N);
+ /// Remove all instances of N from the worklist.
+ void removeFromWorklist(SDNode *N) {
+ CombinedNodes.erase(N);
+
+ auto It = WorklistMap.find(N);
+ if (It == WorklistMap.end())
+ return; // Not in the worklist.
+
+ // Null out the entry rather than erasing it to avoid a linear operation.
+ Worklist[It->second] = nullptr;
+ WorklistMap.erase(It);
}
+ void deleteAndRecombine(SDNode *N);
+ bool recursivelyDeleteUnusedNodes(SDNode *N);
+
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
@@ -154,9 +173,9 @@ namespace {
private:
- /// SimplifyDemandedBits - Check the specified integer node value to see if
- /// it can be simplified or if things it uses can be simplified by bit
- /// propagation. If so, return true.
+ /// Check the specified integer node value to see if it can be simplified or
+ /// if things it uses can be simplified by bit propagation.
+ /// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
APInt Demanded = APInt::getAllOnesValue(BitWidth);
@@ -167,6 +186,7 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
+ SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
/// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
@@ -192,7 +212,7 @@ namespace {
SDValue Trunc, SDValue ExtLoad, SDLoc DL,
ISD::NodeType ExtType);
- /// combine - call the node-specific routine that knows how to fold each
+ /// Call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
SDValue combine(SDNode *N);
@@ -256,6 +276,7 @@ namespace {
SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
+ SDValue visitFSQRT(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
@@ -269,6 +290,8 @@ namespace {
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
+ SDValue visitFMINNUM(SDNode *N);
+ SDValue visitFMAXNUM(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -304,7 +327,12 @@ namespace {
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
+ SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
+ SDValue BuildReciprocalEstimate(SDValue Op);
+ SDValue BuildRsqrtEstimate(SDValue Op);
+ SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
+ SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -321,17 +349,16 @@ namespace {
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
- /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases);
- /// isAlias - Return true if there is any possibility that the two addresses
- /// overlap.
+ /// Return true if there is any possibility that the two addresses overlap.
bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
- /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
- /// looking for a better chain (aliasing node.)
+ /// Walk up chain skipping non-aliasing memory nodes, looking for a better
+ /// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
/// Merge consecutive store operations into a wide store.
@@ -359,13 +386,13 @@ namespace {
FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
}
- /// Run - runs the dag combiner on all nodes in the work list
+ /// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
SelectionDAG &getDAG() const { return DAG; }
- /// getShiftAmountTy - Returns a type large enough to hold any valid
- /// shift amount - before type legalization these can be huge.
+ /// Returns a type large enough to hold any valid shift amount - before type
+ /// legalization these can be huge.
EVT getShiftAmountTy(EVT LHSTy) {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
@@ -374,15 +401,14 @@ namespace {
: TLI.getPointerTy();
}
- /// isTypeLegal - This method returns true if we are running before type
- /// legalization or if the specified VT is legal.
+ /// This method returns true if we are running before type legalization or
+ /// if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
if (!LegalTypes) return true;
return TLI.isTypeLegal(VT);
}
- /// getSetCCResultType - Convenience wrapper around
- /// TargetLowering::getSetCCResultType
+ /// Convenience wrapper around TargetLowering::getSetCCResultType
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(*DAG.getContext(), VT);
}
@@ -391,16 +417,16 @@ namespace {
namespace {
-/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
-class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+class WorklistRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
- explicit WorkListRemover(DAGCombiner &dc)
+ explicit WorklistRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
void NodeDeleted(SDNode *N, SDNode *E) override {
- DC.removeFromWorkList(N);
+ DC.removeFromWorklist(N);
}
};
}
@@ -410,11 +436,11 @@ public:
//===----------------------------------------------------------------------===//
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
- ((DAGCombiner*)DC)->AddToWorkList(N);
+ ((DAGCombiner*)DC)->AddToWorklist(N);
}
void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
- ((DAGCombiner*)DC)->removeFromWorkList(N);
+ ((DAGCombiner*)DC)->removeFromWorklist(N);
}
SDValue TargetLowering::DAGCombinerInfo::
@@ -442,9 +468,24 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Helper Functions
//===----------------------------------------------------------------------===//
-/// isNegatibleForFree - Return 1 if we can compute the negated form of the
-/// specified expression for the same cost as the expression itself, or 2 if we
-/// can compute the negated form more cheaply than the expression itself.
+void DAGCombiner::deleteAndRecombine(SDNode *N) {
+ removeFromWorklist(N);
+
+ // If the operands of this node are only used by the node, they will now be
+ // dead. Make sure to re-visit them and recursively delete dead nodes.
+ for (const SDValue &Op : N->ops())
+ // For an operand generating multiple values, one of the values may
+ // become dead allowing further simplification (e.g. split index
+ // arithmetic from an indexed load).
+ if (Op->hasOneUse() || Op->getNumValues() > 1)
+ AddToWorklist(Op.getNode());
+
+ DAG.DeleteNode(N);
+}
+
+/// Return 1 if we can compute the negated form of the specified expression for
+/// the same cost as the expression itself, or 2 if we can compute the negated
+/// form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
@@ -507,10 +548,10 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
}
}
-/// GetNegatedExpression - If isNegatibleForFree returns true, this function
-/// returns the newly negated expression.
+/// If isNegatibleForFree returns true, return the newly negated expression.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations, unsigned Depth = 0) {
+ const TargetOptions &Options = DAG.getTarget().Options;
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
@@ -527,12 +568,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- assert(DAG.getTarget().Options.UnsafeFPMath);
+ assert(Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -544,7 +584,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- assert(DAG.getTarget().Options.UnsafeFPMath);
+ assert(Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -557,12 +597,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FMUL:
case ISD::FDIV:
- assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
+ assert(!Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -587,7 +626,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
}
-// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// Return true if this node is a setcc, or is a select_cc
// that selects between the target values used for true and false, making it
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
// the appropriate nodes based on the type of node we are checking. This
@@ -606,15 +645,19 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
!TLI.isConstFalseVal(N.getOperand(3).getNode()))
return false;
+ if (TLI.getBooleanContents(N.getValueType()) ==
+ TargetLowering::UndefinedBooleanContent)
+ return false;
+
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(4);
return true;
}
-// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
-// one use. If this is true, it allows the users to invert the operation for
-// free when it is profitable to do so.
+/// Return true if this is a SetCC-equivalent operation with only one use.
+/// If this is true, it allows the users to invert the operation for free when
+/// it is profitable to do so.
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
SDValue N0, N1, N2;
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
@@ -622,7 +665,7 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
+/// Returns true if N is a BUILD_VECTOR node whose
/// elements are all the same constant or undefined.
static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
@@ -643,7 +686,7 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
return N.getNode();
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
- if(BV && BV->isConstant())
+ if (BV && BV->isConstant())
return BV;
return nullptr;
}
@@ -669,6 +712,23 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) {
return nullptr;
}
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// float.
+static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+
+ if (CN && UndefElements.none())
+ return CN;
+ }
+
+ return nullptr;
+}
+
SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
@@ -687,7 +747,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
if (!OpNode.getNode())
return SDValue();
- AddToWorkList(OpNode.getNode());
+ AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
@@ -708,7 +768,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
if (!OpNode.getNode())
return SDValue();
- AddToWorkList(OpNode.getNode());
+ AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
}
}
@@ -730,14 +790,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
assert((!To[i].getNode() ||
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
if (To[i].getNode()) {
- AddToWorkList(To[i].getNode());
- AddUsersToWorkList(To[i].getNode());
+ AddToWorklist(To[i].getNode());
+ AddUsersToWorklist(To[i].getNode());
}
}
}
@@ -745,14 +805,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
- if (N->use_empty()) {
- // Nodes can be reintroduced into the worklist. Make sure we do not
- // process a node that has been replaced.
- removeFromWorkList(N);
-
- // Finally, since the node is now dead, remove it from the graph.
- DAG.DeleteNode(N);
- }
+ if (N->use_empty())
+ deleteAndRecombine(N);
return SDValue(N, 0);
}
@@ -760,32 +814,22 @@ void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
- AddToWorkList(TLO.New.getNode());
- AddUsersToWorkList(TLO.New.getNode());
+ AddToWorklist(TLO.New.getNode());
+ AddUsersToWorklist(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
- if (TLO.Old.getNode()->use_empty()) {
- removeFromWorkList(TLO.Old.getNode());
-
- // If the operands of this node are only used by the node, they will now
- // be dead. Make sure to visit them first to delete dead nodes early.
- for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
- if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
- AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
-
- DAG.DeleteNode(TLO.Old.getNode());
- }
+ if (TLO.Old.getNode()->use_empty())
+ deleteAndRecombine(TLO.Old.getNode());
}
-/// SimplifyDemandedBits - Check the specified integer node value to see if
-/// it can be simplified or if things it uses can be simplified by bit
-/// propagation. If so, return true.
+/// Check the specified integer node value to see if it can be simplified or if
+/// things it uses can be simplified by bit propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownZero, KnownOne;
@@ -793,7 +837,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
return false;
// Revisit the node.
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
// Replace the old value with the new one.
++NodesCombined;
@@ -817,12 +861,11 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
dbgs() << "\nWith: ";
Trunc.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
- removeFromWorkList(Load);
- DAG.DeleteNode(Load);
- AddToWorkList(Trunc.getNode());
+ deleteAndRecombine(Load);
+ AddToWorklist(Trunc.getNode());
}
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
@@ -872,7 +915,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
return SDValue();
- AddToWorkList(NewOp.getNode());
+ AddToWorklist(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
@@ -887,16 +930,16 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
return SDValue();
- AddToWorkList(NewOp.getNode());
+ AddToWorklist(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
}
-/// PromoteIntBinOp - Promote the specified integer binary operation if the
-/// target indicates it is beneficial. e.g. On x86, it's usually better to
-/// promote i16 operations to i32 since i16 instructions are longer.
+/// Promote the specified integer binary operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
@@ -934,9 +977,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
return SDValue();
}
- AddToWorkList(NN0.getNode());
+ AddToWorklist(NN0.getNode());
if (NN1.getNode())
- AddToWorkList(NN1.getNode());
+ AddToWorklist(NN1.getNode());
if (Replace0)
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
@@ -952,9 +995,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
return SDValue();
}
-/// PromoteIntShiftOp - Promote the specified integer shift operation if the
-/// target indicates it is beneficial. e.g. On x86, it's usually better to
-/// promote i16 operations to i32 since i16 instructions are longer.
+/// Promote the specified integer shift operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
@@ -986,7 +1029,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (!N0.getNode())
return SDValue();
- AddToWorkList(N0.getNode());
+ AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
@@ -1066,17 +1109,45 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
dbgs() << "\nTo: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
- removeFromWorkList(N);
- DAG.DeleteNode(N);
- AddToWorkList(Result.getNode());
+ deleteAndRecombine(N);
+ AddToWorklist(Result.getNode());
return true;
}
return false;
}
+/// \brief Recursively delete a node which has no uses and any operands for
+/// which it is the only use.
+///
+/// Note that this both deletes the nodes and removes them from the worklist.
+/// It also adds any nodes who have had a user deleted to the worklist as they
+/// may now have only one use and subject to other combines.
+bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
+ if (!N->use_empty())
+ return false;
+
+ SmallSetVector<SDNode *, 16> Nodes;
+ Nodes.insert(N);
+ do {
+ N = Nodes.pop_back_val();
+ if (!N)
+ continue;
+
+ if (N->use_empty()) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Nodes.insert(N->getOperand(i).getNode());
+
+ removeFromWorklist(N);
+ DAG.DeleteNode(N);
+ } else {
+ AddToWorklist(N);
+ }
+ } while (!Nodes.empty());
+ return true;
+}
//===----------------------------------------------------------------------===//
// Main DAG Combiner implementation
@@ -1088,44 +1159,69 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
+ // Early exit if this basic block is in an optnone function.
+ AttributeSet FnAttrs =
+ DAG.getMachineFunction().getFunction()->getAttributes();
+ if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeNone))
+ return;
+
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
- AddToWorkList(I);
+ AddToWorklist(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
- // The root of the dag may dangle to deleted nodes until the dag combiner is
- // done. Set it to null to avoid confusion.
- DAG.setRoot(SDValue());
-
// while the worklist isn't empty, find a node and
// try and combine it.
- while (!WorkListContents.empty()) {
+ while (!WorklistMap.empty()) {
SDNode *N;
- // The WorkListOrder holds the SDNodes in order, but it may contain
- // duplicates.
- // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
- // worklist *should* contain, and check the node we want to visit is should
- // actually be visited.
+ // The Worklist holds the SDNodes in order, but it may contain null entries.
do {
- N = WorkListOrder.pop_back_val();
- } while (!WorkListContents.erase(N));
+ N = Worklist.pop_back_val();
+ } while (!N);
+
+ bool GoodWorklistEntry = WorklistMap.erase(N);
+ (void)GoodWorklistEntry;
+ assert(GoodWorklistEntry &&
+ "Found a worklist entry without a corresponding map entry!");
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
- if (N->use_empty() && N != &Dummy) {
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- AddToWorkList(N->getOperand(i).getNode());
-
- DAG.DeleteNode(N);
+ if (recursivelyDeleteUnusedNodes(N))
continue;
+
+ WorklistRemover DeadNodes(*this);
+
+ // If this combine is running after legalizing the DAG, re-legalize any
+ // nodes pulled off the worklist.
+ if (Level == AfterLegalizeDAG) {
+ SmallSetVector<SDNode *, 16> UpdatedNodes;
+ bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
+
+ for (SDNode *LN : UpdatedNodes) {
+ AddToWorklist(LN);
+ AddUsersToWorklist(LN);
+ }
+ if (!NIsValid)
+ continue;
}
+ DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
+
+ // Add any operands of the new node which have not yet been combined to the
+ // worklist as well. Because the worklist uniques things already, this
+ // won't repeatedly process the same operand.
+ CombinedNodes.insert(N);
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (!CombinedNodes.count(N->getOperand(i).getNode()))
+ AddToWorklist(N->getOperand(i).getNode());
+
SDValue RV = combine(N);
if (!RV.getNode())
@@ -1144,15 +1240,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << "\nReplacing.3 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- RV.getNode()->dump(&DAG);
- dbgs() << '\n');
+ DEBUG(dbgs() << " ... into: ";
+ RV.getNode()->dump(&DAG));
// Transfer debug value.
DAG.TransferDbgValues(SDValue(N, 0), RV);
- WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
@@ -1163,26 +1255,14 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
}
// Push the new node and any users onto the worklist
- AddToWorkList(RV.getNode());
- AddUsersToWorkList(RV.getNode());
-
- // Add any uses of the old node to the worklist in case this node is the
- // last one that uses them. They may become dead after this node is
- // deleted.
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- AddToWorkList(N->getOperand(i).getNode());
+ AddToWorklist(RV.getNode());
+ AddUsersToWorklist(RV.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
- // something else needing this node.
- if (N->use_empty()) {
- // Nodes can be reintroduced into the worklist. Make sure we do not
- // process a node that has been replaced.
- removeFromWorkList(N);
-
- // Finally, since the node is now dead, remove it from the graph.
- DAG.DeleteNode(N);
- }
+ // something else needing this node. This will also take care of adding any
+ // operands which have lost a user to the worklist.
+ recursivelyDeleteUnusedNodes(N);
}
// If the root changed (e.g. it was a dead load, update the root).
@@ -1244,6 +1324,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
+ case ISD::FSQRT: return visitFSQRT(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
@@ -1255,6 +1336,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FMINNUM: return visitFMINNUM(N);
+ case ISD::FMAXNUM: return visitFMAXNUM(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
@@ -1347,8 +1430,8 @@ SDValue DAGCombiner::combine(SDNode *N) {
return RV;
}
-/// getInputChainForNode - Given a node, return its input chain if it has one,
-/// otherwise return a null sd operand.
+/// Given a node, return its input chain if it has one, otherwise return a null
+/// sd operand.
static SDValue getInputChainForNode(SDNode *N) {
if (unsigned NumOps = N->getNumOperands()) {
if (N->getOperand(0).getValueType() == MVT::Other)
@@ -1402,7 +1485,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// Queue up for processing.
TFs.push_back(Op.getNode());
// Clean up in case the token factor is removed.
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
Changed = true;
break;
}
@@ -1410,7 +1493,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
default:
// Only add if it isn't already in the list.
- if (SeenOps.insert(Op.getNode()))
+ if (SeenOps.insert(Op.getNode()).second)
Ops.push_back(Op);
else
Changed = true;
@@ -1440,44 +1523,21 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
/// MERGE_VALUES can always be eliminated.
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
// First add the users of this node to the work list so that they
// can be tried again once they have new operands.
- AddUsersToWorkList(N);
+ AddUsersToWorklist(N);
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
} while (!N->use_empty());
- removeFromWorkList(N);
- DAG.DeleteNode(N);
+ deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-static
-SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1,
- SelectionDAG &DAG) {
- EVT VT = N0.getValueType();
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
- ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
-
- if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
- isa<ConstantSDNode>(N00.getOperand(1))) {
- // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
- N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT,
- DAG.getNode(ISD::SHL, SDLoc(N00), VT,
- N00.getOperand(0), N01),
- DAG.getNode(ISD::SHL, SDLoc(N01), VT,
- N00.getOperand(1), N01));
- return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
- }
-
- return SDValue();
-}
-
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1594,16 +1654,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
}
- // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
- if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
- SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG);
- if (Result.getNode()) return Result;
- }
- if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
- SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG);
- if (Result.getNode()) return Result;
- }
-
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL &&
N1.getOperand(0).getOpcode() == ISD::SUB)
@@ -1647,6 +1697,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
+ // add X, (sextinreg Y i1) -> sub X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDLoc DL(N);
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
+ }
+ }
+
return SDValue();
}
@@ -1812,6 +1873,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
VT);
}
+ // sub X, (sextinreg Y i1) -> add X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDLoc DL(N);
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
+ }
+ }
+
return SDValue();
}
@@ -1933,7 +2005,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
isa<ConstantSDNode>(N0.getOperand(1)))) {
SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
N1, N0.getOperand(1));
- AddToWorkList(C3.getNode());
+ AddToWorklist(C3.getNode());
return DAG.getNode(ISD::MUL, SDLoc(N), VT,
N0.getOperand(0), C3);
}
@@ -2016,9 +2088,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
(-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
- if (TLI.isPow2DivCheap())
+ if (TLI.isPow2SDivCheap())
return SDValue();
+ // Target-specific implementation of sdiv x, pow2.
+ SDValue Res = BuildSDIVPow2(N);
+ if (Res.getNode())
+ return Res;
+
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
@@ -2026,7 +2103,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
DAG.getConstant(VT.getScalarSizeInBits() - 1,
getShiftAmountTy(N0.getValueType())));
- AddToWorkList(SGN.getNode());
+ AddToWorklist(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue SRL =
@@ -2034,8 +2111,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
DAG.getConstant(VT.getScalarSizeInBits() - lg2,
getShiftAmountTy(SGN.getValueType())));
SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
- AddToWorkList(SRL.getNode());
- AddToWorkList(ADD.getNode()); // Divide by pow2
+ AddToWorklist(SRL.getNode());
+ AddToWorklist(ADD.getNode()); // Divide by pow2
SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
@@ -2044,7 +2121,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (N1C->getAPIntValue().isNonNegative())
return SRA;
- AddToWorkList(SRA.getNode());
+ AddToWorklist(SRA.getNode());
return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
}
@@ -2096,7 +2173,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
DAG.getConstant(SHC->getAPIntValue()
.logBase2(),
ADDVT));
- AddToWorkList(Add.getNode());
+ AddToWorklist(Add.getNode());
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
}
}
@@ -2138,13 +2215,13 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
- AddToWorkList(Div.getNode());
+ AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
- AddToWorkList(Mul.getNode());
+ AddToWorklist(Mul.getNode());
return Sub;
}
}
@@ -2181,7 +2258,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
VT));
- AddToWorkList(Add.getNode());
+ AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
}
}
@@ -2191,13 +2268,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
- AddToWorkList(Div.getNode());
+ AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
- AddToWorkList(Mul.getNode());
+ AddToWorklist(Mul.getNode());
return Sub;
}
}
@@ -2286,10 +2363,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return SDValue();
}
-/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
-/// compute two values. LoOp and HiOp give the opcodes for the two computations
-/// that are being performed. Return true if a simplification was made.
-///
+/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
+/// give the opcodes for the two computations that are being performed. Return
+/// true if a simplification was made.
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp) {
// If the high half is not needed, just compute the low half.
@@ -2297,8 +2373,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
if (!HiExists &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
- SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
- ArrayRef<SDUse>(N->op_begin(), N->op_end()));
+ SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
return CombineTo(N, Res, Res);
}
@@ -2307,8 +2382,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
if (!LoExists &&
(!LegalOperations ||
TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
- SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
- ArrayRef<SDUse>(N->op_begin(), N->op_end()));
+ SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
return CombineTo(N, Res, Res);
}
@@ -2318,9 +2392,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
// If the two computed results can be simplified separately, separate them.
if (LoExists) {
- SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
- ArrayRef<SDUse>(N->op_begin(), N->op_end()));
- AddToWorkList(Lo.getNode());
+ SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+ AddToWorklist(Lo.getNode());
SDValue LoOpt = combine(Lo.getNode());
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
(!LegalOperations ||
@@ -2329,9 +2402,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
}
if (HiExists) {
- SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
- ArrayRef<SDUse>(N->op_begin(), N->op_end()));
- AddToWorkList(Hi.getNode());
+ SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+ AddToWorklist(Hi.getNode());
SDValue HiOpt = combine(Hi.getNode());
if (HiOpt.getNode() && HiOpt != Hi &&
(!LegalOperations ||
@@ -2436,8 +2508,8 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
return SDValue();
}
-/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
-/// two operands of the same opcode, try to simplify it.
+/// If this is a binary operator with two operands of the same opcode, try to
+/// simplify it.
SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
EVT VT = N0.getValueType();
@@ -2470,7 +2542,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
N0.getOperand(0).getValueType(),
N0.getOperand(0), N1.getOperand(0));
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
}
@@ -2484,7 +2556,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
N0.getOperand(0).getValueType(),
N0.getOperand(0), N1.getOperand(0));
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
ORNode, N0.getOperand(1));
}
@@ -2509,7 +2581,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
return BC;
}
}
@@ -2556,7 +2628,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0->getOperand(0), N1->getOperand(0));
- AddToWorkList(NewNode.getNode());
+ AddToWorklist(NewNode.getNode());
return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
&SVN0->getMask()[0]);
}
@@ -2577,7 +2649,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0->getOperand(1), N1->getOperand(1));
- AddToWorkList(NewNode.getNode());
+ AddToWorklist(NewNode.getNode());
return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
&SVN0->getMask()[0]);
}
@@ -2603,9 +2675,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and x, 0) -> 0, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return N0;
+ // do not return N0, because undef node may exist in N0
+ return DAG.getConstant(
+ APInt::getNullValue(
+ N0.getValueType().getScalarType().getSizeInBits()),
+ N0.getValueType());
if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
+ // do not return N1, because undef node may exist in N1
+ return DAG.getConstant(
+ APInt::getNullValue(
+ N1.getValueType().getScalarType().getSizeInBits()),
+ N1.getValueType());
// fold (and x, -1) -> x, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -2768,21 +2848,21 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
LR.getValueType(), LL, RL);
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
// fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
LR.getValueType(), LL, RL);
- AddToWorkList(ANDNode.getNode());
+ AddToWorklist(ANDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
}
// fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
LR.getValueType(), LL, RL);
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
}
@@ -2795,7 +2875,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
cast<ConstantSDNode>(RR)->isNullValue()))) {
SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
LL, DAG.getConstant(1, LL.getValueType()));
- AddToWorkList(ADDNode.getNode());
+ AddToWorklist(ADDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ADDNode,
DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
}
@@ -2843,7 +2923,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
- AddToWorkList(N);
+ AddToWorklist(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -2863,7 +2943,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
- AddToWorkList(N);
+ AddToWorklist(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -2894,7 +2974,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
- AddToWorkList(N);
+ AddToWorklist(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -2921,7 +3001,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
Alignment = MinAlign(Alignment, PtrOff);
}
- AddToWorkList(NewPtr.getNode());
+ AddToWorklist(NewPtr.getNode());
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
@@ -2929,8 +3009,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
LN0->getChain(), NewPtr,
LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- Alignment, LN0->getTBAAInfo());
- AddToWorkList(N);
+ LN0->isInvariant(), Alignment, LN0->getAAInfo());
+ AddToWorklist(N);
CombineTo(LN0, Load, Load.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -2976,8 +3056,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue();
}
-/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
-///
+/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits) {
if (!LegalOperations)
@@ -3082,10 +3161,13 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
return Res;
}
-/// isBSwapHWordElement - Return true if the specified node is an element
-/// that makes up a 32-bit packed halfword byteswap. i.e.
-/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
-static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
+/// Return true if the specified node is an element that makes up a 32-bit
+/// packed halfword byteswap.
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
+static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
if (!N.getNode()->hasOneUse())
return false;
@@ -3152,8 +3234,11 @@ static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
return true;
}
-/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
-/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// Match a 32-bit packed halfword bswap. That is
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
/// => (rotl (bswap x), 16)
SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!LegalOperations)
@@ -3165,7 +3250,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
- SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr);
// Look for either
// (or (or (and), (and)), (or (and), (and)))
// (or (or (or (and), (and)), (and)), (and))
@@ -3173,6 +3257,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
return SDValue();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
+ SDNode *Parts[4] = {};
if (N1.getOpcode() == ISD::OR &&
N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
@@ -3246,15 +3331,25 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
- return N0;
+ // do not return N0, because undef node may exist in N0
+ return DAG.getConstant(
+ APInt::getAllOnesValue(
+ N0.getValueType().getScalarType().getSizeInBits()),
+ N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
- return N1;
+ // do not return N1, because undef node may exist in N1
+ return DAG.getConstant(
+ APInt::getAllOnesValue(
+ N1.getValueType().getScalarType().getSizeInBits()),
+ N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
// Do this only if the resulting shuffle is legal.
if (isa<ShuffleVectorSDNode>(N0) &&
isa<ShuffleVectorSDNode>(N1) &&
+ // Avoid folding a node with illegal type.
+ TLI.isTypeLegal(VT) &&
N0->getOperand(1) == N1->getOperand(1) &&
ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
bool CanFold = true;
@@ -3366,7 +3461,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
(Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
LR.getValueType(), LL, RL);
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
// fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
@@ -3375,7 +3470,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
(Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
LR.getValueType(), LL, RL);
- AddToWorkList(ANDNode.getNode());
+ AddToWorklist(ANDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
}
}
@@ -3438,7 +3533,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return SDValue();
}
-/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+/// Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND) {
if (isa<ConstantSDNode>(Op.getOperand(1))) {
@@ -3735,7 +3830,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return RXOR;
// fold !(x cc y) -> (x !cc y)
- if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
isInt);
@@ -3761,7 +3856,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue V = N0.getOperand(0);
V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
DAG.getConstant(1, V.getValueType()));
- AddToWorkList(V.getNode());
+ AddToWorklist(V.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
}
@@ -3773,7 +3868,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
}
}
@@ -3785,7 +3880,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
}
}
@@ -3794,7 +3889,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N0->getOperand(1) == N1) {
SDValue X = N0->getOperand(0);
SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
- AddToWorkList(NotX.getNode());
+ AddToWorklist(NotX.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
}
// fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
@@ -3828,8 +3923,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return SDValue();
}
-/// visitShiftByConstant - Handle transforms common to the three shifts, when
-/// the shift amount is a constant.
+/// Handle transforms common to the three shifts, when the shift amount is a
+/// constant.
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// We can't and shouldn't fold opaque constants.
if (Amt->isOpaque())
@@ -4059,7 +4154,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT CountVT = NewOp0.getOperand(1).getValueType();
SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
NewOp0, DAG.getConstant(c2, CountVT));
- AddToWorkList(NewSHL.getNode());
+ AddToWorklist(NewSHL.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
}
@@ -4101,6 +4196,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
HiBitsMask);
}
+ // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // Variant of version done on multiply, except mul by a power of 2 is turned
+ // into a shift.
+ APInt Val;
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ (isa<ConstantSDNode>(N0.getOperand(1)) ||
+ isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+ SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+ SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
+ }
+
if (N1C) {
SDValue NewSHL = visitShiftByConstant(N, N1C);
if (NewSHL.getNode())
@@ -4345,7 +4452,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
N0.getOperand(0),
DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
- AddToWorkList(SmallShift.getNode());
+ AddToWorklist(SmallShift.getNode());
APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
@@ -4387,7 +4494,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (ShAmt) {
Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
}
return DAG.getNode(ISD::XOR, SDLoc(N), VT,
@@ -4439,12 +4546,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N->hasOneUse()) {
SDNode *Use = *N->use_begin();
if (Use->getOpcode() == ISD::BRCOND)
- AddToWorkList(Use);
+ AddToWorklist(Use);
else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
// Also look pass the truncate.
Use = *Use->use_begin();
if (Use->getOpcode() == ISD::BRCOND)
- AddToWorkList(Use);
+ AddToWorklist(Use);
}
}
@@ -4545,7 +4652,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
N0, DAG.getConstant(1, VT0));
XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
N0, DAG.getConstant(1, VT0));
- AddToWorkList(XORNode.getNode());
+ AddToWorklist(XORNode.getNode());
if (VT.bitsGT(VT0))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
@@ -4553,13 +4660,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold (select C, 0, X) -> (and (not C), X)
if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
- AddToWorkList(NOTNode.getNode());
+ AddToWorklist(NOTNode.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
}
// fold (select C, X, 1) -> (or (not C), X)
if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
- AddToWorkList(NOTNode.getNode());
+ AddToWorklist(NOTNode.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
}
// fold (select C, X, 0) -> (and C, X)
@@ -4582,7 +4689,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (N0.getOpcode() == ISD::SETCC) {
if ((!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
- TLI.isOperationLegal(ISD::SELECT_CC, VT))
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
@@ -4616,12 +4723,17 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
- MVT VT = N->getSimpleValueType(0);
+ EVT VT = N->getValueType(0);
int NumElems = VT.getVectorNumElements();
assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR);
+ // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
+ // binary ones here.
+ if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
+ return SDValue();
+
// We're sure we have an even number of elements due to the
// concat_vectors we have as arguments to vselect.
// Skip BV elements until we find one that's not an UNDEF
@@ -4690,8 +4802,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
ISD::SRA, DL, VT, LHS,
DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
- AddToWorkList(Shift.getNode());
- AddToWorkList(Add.getNode());
+ AddToWorklist(Shift.getNode());
+ AddToWorklist(Add.getNode());
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
}
}
@@ -4718,8 +4830,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// Add the new VSELECT nodes to the work list in case they need to be split
// again.
- AddToWorkList(Lo.getNode());
- AddToWorkList(Hi.getNode());
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
@@ -4761,7 +4873,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
N0, N1, CC, SDLoc(N), false);
if (SCC.getNode()) {
- AddToWorkList(SCC.getNode());
+ AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
if (!SCCC->isNullValue())
@@ -4958,7 +5070,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorkList(oye);
+ AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5134,14 +5246,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
SDLoc DL(N);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- SDValue SetCC = DAG.getSetCC(DL,
- SetCCVT,
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
N0.getOperand(0), N0.getOperand(1), CC);
- EVT SelectVT = getSetCCResultType(VT);
- return DAG.getSelect(DL, VT,
- DAG.getSExtOrTrunc(SetCC, DL, SelectVT),
+ return DAG.getSelect(DL, VT, SetCC,
NegOne, DAG.getConstant(0, VT));
-
}
}
}
@@ -5239,7 +5347,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorkList(oye);
+ AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5257,7 +5365,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorkList(oye);
+ AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5265,10 +5373,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
}
return DAG.getZeroExtendInReg(Op, SDLoc(N),
N0.getValueType().getScalarType());
@@ -5487,7 +5595,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorkList(oye);
+ AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5528,8 +5636,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
@@ -5614,9 +5721,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue();
}
-/// GetDemandedBits - See if the specified operand can be simplified with the
-/// knowledge that only the bits specified by Mask are used. If so, return the
-/// simpler operand, otherwise return a null SDValue.
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by Mask are used. If so, return the simpler operand,
+/// otherwise return a null SDValue.
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
@@ -5657,11 +5764,11 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
return SDValue();
}
-/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
-/// bits and then truncated to a narrower type and where N is a multiple
-/// of number of bits of the narrower type, transform it to a narrower load
-/// from address + N / num of bits of new type. If the result is to be
-/// extended, also fold the extension to form a extending load.
+/// If the result of a wider load is shifted to right of N bits and then
+/// truncated to a narrower type and where N is a multiple of number of bits of
+/// the narrower type, transform it to a narrower load from address + N / num of
+/// bits of new type. If the result is to be extended, also fold the extension
+/// to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
@@ -5786,22 +5893,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
PtrType, LN0->getBasePtr(),
DAG.getConstant(PtrOff, PtrType));
- AddToWorkList(NewPtr.getNode());
+ AddToWorklist(NewPtr.getNode());
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
+ LN0->isInvariant(), NewAlign, LN0->getAAInfo());
else
Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- NewAlign, LN0->getTBAAInfo());
+ LN0->isInvariant(), NewAlign, LN0->getAAInfo());
// Replace the old load's chain with the new load's chain.
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
// Shift the result left, if we've swallowed a left shift.
@@ -5900,7 +6007,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- AddToWorkList(ExtLoad.getNode());
+ AddToWorklist(ExtLoad.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
@@ -6022,6 +6129,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
+ if (N0.getOpcode() == ISD::SELECT) {
+ EVT SrcVT = N0.getValueType();
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
+ TLI.isTruncateFree(SrcVT, VT)) {
+ SDLoc SL(N0);
+ SDValue Cond = N0.getOperand(0);
+ SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+ }
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -6121,7 +6241,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
continue;
}
SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
- AddToWorkList(NV.getNode());
+ AddToWorklist(NV.getNode());
Opnds.push_back(NV);
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
@@ -6143,7 +6263,7 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
return Elt.getOperand(Elt.getResNo()).getNode();
}
-/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// build_pair (load, load) -> load
/// if load locations are consecutive.
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
@@ -6209,7 +6329,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// Ideally this won't happen very often, because instcombine
// and the earlier dagcombine runs (where illegal nodes are
// permitted) should have folded most of them already.
- DAG.DeleteNode(Res.getNode());
+ deleteAndRecombine(Res.getNode());
}
}
@@ -6238,12 +6358,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->isInvariant(), OrigAlign,
- LN0->getTBAAInfo());
- AddToWorkList(N);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::BITCAST, SDLoc(N0),
- N0.getValueType(), Load),
- Load.getValue(1));
+ LN0->getAAInfo());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
}
}
@@ -6257,7 +6373,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
!VT.isVector() && !N0.getValueType().isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
N0.getOperand(0));
- AddToWorkList(NewConv.getNode());
+ AddToWorklist(NewConv.getNode());
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
@@ -6280,34 +6396,34 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (isTypeLegal(IntXVT)) {
SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
IntXVT, N0.getOperand(1));
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
// If X has a different width than the result/lhs, sext it or truncate it.
unsigned VTWidth = VT.getSizeInBits();
if (OrigXWidth < VTWidth) {
X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
} else if (OrigXWidth > VTWidth) {
// To get the sign bit in the right place, we have to shift it right
// before truncating.
X = DAG.getNode(ISD::SRL, SDLoc(X),
X.getValueType(), X,
DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
}
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, VT));
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
Cst, DAG.getConstant(~SignBit, VT));
- AddToWorkList(Cst.getNode());
+ AddToWorklist(Cst.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
}
@@ -6328,9 +6444,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
return CombineConsecutiveLoads(N, VT);
}
-/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
-/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
-/// destination element value type.
+/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
+/// operands. DstEltVT indicates the destination element value type.
SDValue DAGCombiner::
ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
@@ -6363,7 +6478,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
DstEltVT, Op));
- AddToWorkList(Ops.back().getNode());
+ AddToWorklist(Ops.back().getNode());
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
@@ -6466,6 +6581,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
@@ -6476,193 +6592,143 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
+
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
- // fold (fadd A, 0) -> A
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
- N1CFP->getValueAPF().isZero())
- return N0;
+
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
+
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
- // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
- N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(1)))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, SDLoc(N), VT,
- N0.getOperand(1), N1));
+ // If 'unsafe math' is enabled, fold lots of things.
+ if (Options.UnsafeFPMath) {
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has a hard time dealing with FP constants.
+ bool AllowNewConst = (Level < AfterLegalizeDAG);
- // No FP constant should be created after legalization as Instruction
- // Selection pass has hard time in dealing with FP constant.
- //
- // We don't need test this condition for transformation like following, as
- // the DAG being transformed implies it is legal to take FP constant as
- // operand.
- //
- // (fadd (fmul c, x), x) -> (fmul c+1, x)
- //
- bool AllowNewFpConst = (Level < AfterLegalizeDAG);
-
- // If allow, fold (fadd (fneg x), x) -> 0.0
- if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
- N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
- return DAG.getConstantFP(0.0, VT);
-
- // If allow, fold (fadd x, (fneg x)) -> 0.0
- if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
- N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
- return DAG.getConstantFP(0.0, VT);
-
- // In unsafe math mode, we can fold chains of FADD's of the same value
- // into multiplications. This transform is not safe in general because
- // we are reducing the number of rounding steps.
- if (DAG.getTarget().Options.UnsafeFPMath &&
- TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
- !N0CFP && !N1CFP) {
- if (N0.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
-
- // (fadd (fmul c, x), x) -> (fmul x, c+1)
- if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP00, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1, NewCFP);
- }
+ // fold (fadd A, 0) -> A
+ if (N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
- // (fadd (fmul x, c), x) -> (fmul x, c+1)
- if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP01, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1, NewCFP);
- }
+ // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ N0.getOperand(1), N1));
+
+ // If allowed, fold (fadd (fneg x), x) -> 0.0
+ if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
+ return DAG.getConstantFP(0.0, VT);
+
+ // If allowed, fold (fadd x, (fneg x)) -> 0.0
+ if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
+ return DAG.getConstantFP(0.0, VT);
+
+ // We can fold chains of FADD's of the same value into multiplications.
+ // This transform is not safe in general because we are reducing the number
+ // of rounding steps.
+ if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+
+ // (fadd (fmul x, c), x) -> (fmul x, c+1)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
+ }
- // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2)
- if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
- N1.getOperand(0) == N1.getOperand(1) &&
- N0.getOperand(1) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP00, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(1), NewCFP);
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+ N0.getOperand(0), NewCFP);
+ }
}
- // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
- if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
- N1.getOperand(0) == N1.getOperand(1) &&
- N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP01, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0), NewCFP);
- }
- }
+ if (N1.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
- if (N1.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+ // (fadd x, (fmul x, c)) -> (fmul x, c+1)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP);
+ }
- // (fadd x, (fmul c, x)) -> (fmul x, c+1)
- if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP10, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0, NewCFP);
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
+ if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N0.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP);
+ }
}
- // (fadd x, (fmul x, c)) -> (fmul x, c+1)
- if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP11, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0, NewCFP);
+ if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ // (fadd (fadd x, x), x) -> (fmul x, 3.0)
+ if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ (N0.getOperand(0) == N1))
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+ N1, DAG.getConstantFP(3.0, VT));
}
-
- // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2)
- if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD &&
- N0.getOperand(0) == N0.getOperand(1) &&
- N1.getOperand(1) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP10, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1.getOperand(1), NewCFP);
+ if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
+ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0)
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+ N0, DAG.getConstantFP(3.0, VT));
}
- // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
- if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
+ if (AllowNewConst &&
+ N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
- N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP11, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1.getOperand(0), NewCFP);
- }
- }
-
- if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- // (fadd (fadd x, x), x) -> (fmul x, 3.0)
- if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
- (N0.getOperand(0) == N1))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1, DAG.getConstantFP(3.0, VT));
- }
-
- if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
- if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
- N1.getOperand(0) == N0)
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0, DAG.getConstantFP(3.0, VT));
+ N0.getOperand(0), DAG.getConstantFP(4.0, VT));
}
-
- // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
- if (AllowNewFpConst &&
- N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
- N0.getOperand(0) == N0.getOperand(1) &&
- N1.getOperand(0) == N1.getOperand(1) &&
- N0.getOperand(0) == N1.getOperand(0))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0),
- DAG.getConstantFP(4.0, VT));
- }
+ } // enable-unsafe-fp-math
// FADD -> FMA combines:
- if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
- DAG.getTarget().Options.UnsafeFPMath) &&
- DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+ if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
+ if (N0.getOpcode() == ISD::FMUL &&
+ (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1), N1);
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
+ if (N1.getOpcode() == ISD::FMUL &&
+ (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
@@ -6673,10 +6739,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
EVT VT = N->getValueType(0);
SDLoc dl(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
@@ -6687,60 +6754,60 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
- // fold (fsub A, 0) -> A
- if (DAG.getTarget().Options.UnsafeFPMath &&
- N1CFP && N1CFP->getValueAPF().isZero())
- return N0;
- // fold (fsub 0, B) -> -B
- if (DAG.getTarget().Options.UnsafeFPMath &&
- N0CFP && N0CFP->getValueAPF().isZero()) {
- if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
- return GetNegatedExpression(N1, DAG, LegalOperations);
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, dl, VT, N1);
- }
+
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
- // If 'unsafe math' is enabled, fold
- // (fsub x, x) -> 0.0 &
- // (fsub x, (fadd x, y)) -> (fneg y) &
- // (fsub x, (fadd y, x)) -> (fneg y)
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ // If 'unsafe math' is enabled, fold lots of things.
+ if (Options.UnsafeFPMath) {
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+
+ // (fsub 0, B) -> -B
+ if (N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
+ }
+
+ // (fsub x, x) -> 0.0
if (N0 == N1)
return DAG.getConstantFP(0.0f, VT);
+ // (fsub x, (fadd x, y)) -> (fneg y)
+ // (fsub x, (fadd y, x)) -> (fneg y)
if (N1.getOpcode() == ISD::FADD) {
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
- if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
- &DAG.getTarget().Options))
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
return GetNegatedExpression(N11, DAG, LegalOperations);
- if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
- &DAG.getTarget().Options))
+ if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
return GetNegatedExpression(N10, DAG, LegalOperations);
}
}
// FSUB -> FMA combines:
- if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
- DAG.getTarget().Options.UnsafeFPMath) &&
- DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+ if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
+ if (N0.getOpcode() == ISD::FMUL &&
+ (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, dl, VT, N1));
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
+ if (N1.getOpcode() == ISD::FMUL &&
+ (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT,
N1.getOperand(0)),
@@ -6749,7 +6816,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG &&
N0.getOperand(0).getOpcode() == ISD::FMUL &&
- N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
+ TLI.enableAggressiveFMAFusion(VT))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(ISD::FMA, dl, VT,
@@ -6764,47 +6832,82 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
EVT VT = N->getValueType(0);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
+ // This just handles C1 * C2 for vectors. Other vector folds are below.
SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (FoldedVOp.getNode())
+ return FoldedVOp;
+ // Canonicalize vector constant to RHS.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR &&
+ N1.getOpcode() != ISD::BUILD_VECTOR)
+ if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
+ if (BV0->isConstant())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
}
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
+
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
- // fold (fmul A, 0) -> 0
- if (DAG.getTarget().Options.UnsafeFPMath &&
- N1CFP && N1CFP->getValueAPF().isZero())
- return N1;
- // fold (fmul A, 0) -> 0, vector edition.
- if (DAG.getTarget().Options.UnsafeFPMath &&
- ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
+
// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))
return N0;
+
+ if (Options.UnsafeFPMath) {
+ // fold (fmul A, 0) -> 0
+ if (N1CFP && N1CFP->getValueAPF().isZero())
+ return N1;
+
+ // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (N0.getOpcode() == ISD::FMUL) {
+ // Fold scalars or any vector constants (not just splats).
+ // This fold is done in general by InstCombine, but extra fmul insts
+ // may have been generated during lowering.
+ SDValue N01 = N0.getOperand(1);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
+ if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+ (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+ SDLoc SL(N);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+ }
+ }
+
+ // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
+ // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
+ // during an early run of DAGCombiner can prevent folding with fmuls
+ // inserted during lowering.
+ if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
+ SDLoc SL(N);
+ const SDValue Two = DAG.getConstantFP(2.0, VT);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
+ }
+ }
+
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
+
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
- &DAG.getTarget().Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
- &DAG.getTarget().Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -6814,14 +6917,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
}
- // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
- if (DAG.getTarget().Options.UnsafeFPMath &&
- N1CFP && N0.getOpcode() == ISD::FMUL &&
- N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(1), N1));
-
return SDValue();
}
@@ -6833,8 +6928,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc dl(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ // Constant fold FMA.
+ if (isa<ConstantFPSDNode>(N0) &&
+ isa<ConstantFPSDNode>(N1) &&
+ isa<ConstantFPSDNode>(N2)) {
+ return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
+ }
+
+ if (Options.UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
if (N1CFP && N1CFP->isZero())
@@ -6850,7 +6953,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ if (Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FMUL &&
N0 == N2.getOperand(0) &&
N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
@@ -6860,7 +6963,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
- if (DAG.getTarget().Options.UnsafeFPMath &&
+ if (Options.UnsafeFPMath &&
N0.getOpcode() == ISD::FMUL && N1CFP &&
N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
return DAG.getNode(ISD::FMA, dl, VT,
@@ -6878,19 +6981,19 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
- AddToWorkList(RHSNeg.getNode());
+ AddToWorklist(RHSNeg.getNode());
return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
}
}
// (fma x, c, x) -> (fmul x, (c+1))
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2)
+ if (Options.UnsafeFPMath && N1CFP && N0 == N2)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
N1, DAG.getConstantFP(1.0, VT)));
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ if (Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
@@ -6906,7 +7009,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
@@ -6918,30 +7022,79 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
- // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
- if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
- // Compute the reciprocal 1.0 / c2.
- APFloat N1APF = N1CFP->getValueAPF();
- APFloat Recip(N1APF.getSemantics(), 1); // 1.0
- APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
- // Only do the transform if the reciprocal is a legal fp immediate that
- // isn't too nasty (eg NaN, denormal, ...).
- if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
- (!LegalOperations ||
- // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
- // backend)... we should handle this gracefully after Legalize.
- // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
- TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(Recip, VT)))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
- DAG.getConstantFP(Recip, VT));
+ if (Options.UnsafeFPMath) {
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP) {
+ // Compute the reciprocal 1.0 / c2.
+ APFloat N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
+ DAG.getConstantFP(Recip, VT));
+ }
+
+ // If this FDIV is part of a reciprocal square root, it may be folded
+ // into a target-specific square root estimate instruction.
+ if (N1.getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ } else if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ } else if (N1.getOpcode() == ISD::FP_ROUND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ } else if (N1.getOpcode() == ISD::FMUL) {
+ // Look through an FMUL. Even though this won't remove the FDIV directly,
+ // it's still worthwhile to get rid of the FSQRT if possible.
+ SDValue SqrtOp;
+ SDValue OtherOp;
+ if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(0);
+ OtherOp = N1.getOperand(1);
+ } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(1);
+ OtherOp = N1.getOperand(0);
+ }
+ if (SqrtOp.getNode()) {
+ // We found a FSQRT, so try to make this fold:
+ // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
+ if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ }
+ }
+
+ // Fold into a reciprocal estimate and multiply instead of a real divide.
+ if (SDValue RV = BuildReciprocalEstimate(N1)) {
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
- &DAG.getTarget().Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
- &DAG.getTarget().Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -6968,6 +7121,31 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFSQRT(SDNode *N) {
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
+ if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
+ EVT VT = RV.getValueType();
+ RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
+ AddToWorklist(RV.getNode());
+
+ // Unfortunately, RV is now NaN if the input was exactly 0.
+ // Select out this case and force the answer to 0.
+ SDValue Zero = DAG.getConstantFP(0.0, VT);
+ SDValue ZeroCmp =
+ DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+ AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
+ SDLoc(N), VT, ZeroCmp, Zero, RV);
+ return RV;
+ }
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -7162,7 +7340,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
- AddToWorkList(Tmp.getNode());
+ AddToWorklist(Tmp.getNode());
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
Tmp, N0.getOperand(1));
}
@@ -7213,8 +7391,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -7231,6 +7408,43 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+// FIXME: FNEG and FABS have a lot in common; refactor.
SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7240,24 +7454,36 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (FoldedVOp.getNode()) return FoldedVOp;
}
+ // Constant fold FNEG.
+ if (isa<ConstantFPSDNode>(N0))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
+
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
- // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
- if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
- !VT.isVector() &&
- N0.getNode()->hasOneUse() &&
- N0.getOperand(0).getValueType().isInteger()) {
+ if (!TLI.isFNegFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST &&
+ N0.getNode()->hasOneUse()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x80... per scalar element
+ // and splat it.
+ SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x80...
+ SignMask = APInt::getSignBit(IntVT.getSizeInBits());
+ }
Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
- DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
- AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BITCAST, SDLoc(N),
- VT, Int);
+ DAG.getConstant(SignMask, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
}
}
@@ -7279,45 +7505,50 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- EVT VT = N->getValueType(0);
-
- // fold (fceil c1) -> fceil(c1)
- if (N0CFP)
- return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
-
- return SDValue();
-}
+ SDValue N1 = N->getOperand(1);
+ const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- EVT VT = N->getValueType(0);
+ if (N0CFP && N1CFP) {
+ const APFloat &C0 = N0CFP->getValueAPF();
+ const APFloat &C1 = N1CFP->getValueAPF();
+ return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0));
+ }
- // fold (ftrunc c1) -> ftrunc(c1)
- if (N0CFP)
- return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+ if (N0CFP) {
+ EVT VT = N->getValueType(0);
+ // Canonicalize to constant on RHS.
+ return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
+ }
return SDValue();
}
-SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- EVT VT = N->getValueType(0);
+ SDValue N1 = N->getOperand(1);
+ const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- // fold (ffloor c1) -> ffloor(c1)
- if (N0CFP)
- return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
+ if (N0CFP && N1CFP) {
+ const APFloat &C0 = N0CFP->getValueAPF();
+ const APFloat &C1 = N1CFP->getValueAPF();
+ return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0));
+ }
+
+ if (N0CFP) {
+ EVT VT = N->getValueType(0);
+ // Canonicalize to constant on RHS.
+ return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
+ }
return SDValue();
}
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
if (VT.isVector()) {
@@ -7326,30 +7557,40 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
}
// fold (fabs c1) -> fabs(c1)
- if (N0CFP)
+ if (isa<ConstantFPSDNode>(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+
// fold (fabs (fabs x)) -> (fabs x)
if (N0.getOpcode() == ISD::FABS)
return N->getOperand(0);
+
// fold (fabs (fneg x)) -> (fabs x)
// fold (fabs (fcopysign x, y)) -> (fabs x)
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
- // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+ // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
// constant pool values.
if (!TLI.isFAbsFree(VT) &&
- N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
- N0.getOperand(0).getValueType().isInteger() &&
- !N0.getOperand(0).getValueType().isVector()) {
+ N0.getOpcode() == ISD::BITCAST &&
+ N0.getNode()->hasOneUse()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x7f... per scalar element
+ // and splat it.
+ SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x7f...
+ SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
+ }
Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
- DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
- AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BITCAST, SDLoc(N),
- N->getValueType(0), Int);
+ DAG.getConstant(SignMask, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
}
}
@@ -7429,15 +7670,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// will convert it back to (X & C1) >> C2.
CombineTo(N, NewBRCond, false);
// Truncate is dead.
- if (Trunc) {
- removeFromWorkList(Trunc);
- DAG.DeleteNode(Trunc);
- }
+ if (Trunc)
+ deleteAndRecombine(Trunc);
// Replace the uses of SRL with SETCC
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- removeFromWorkList(N1.getNode());
- DAG.DeleteNode(N1.getNode());
+ deleteAndRecombine(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -7464,10 +7702,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
dbgs() << "\nWith: ";
Tmp.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
- removeFromWorkList(TheXor);
- DAG.DeleteNode(TheXor);
+ deleteAndRecombine(TheXor);
return DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, Tmp, N2);
}
@@ -7495,10 +7732,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Op0, Op1,
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- removeFromWorkList(N1.getNode());
- DAG.DeleteNode(N1.getNode());
+ deleteAndRecombine(N1.getNode());
return DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, SetCC, N2);
}
@@ -7523,7 +7759,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
CondLHS, CondRHS, CC->get(), SDLoc(N),
false);
- if (Simp.getNode()) AddToWorkList(Simp.getNode());
+ if (Simp.getNode()) AddToWorklist(Simp.getNode());
// fold to a simpler setcc
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
@@ -7535,9 +7771,8 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
-/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
-/// uses N as its base pointer and that N may be folded in the load / store
-/// addressing mode.
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -7576,12 +7811,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
}
-/// CombineToPreIndexedLoadStore - Try turning a load / store into a
-/// pre-indexed load / store when the base pointer is an add or subtract
-/// and it has other uses besides the load / store. After the
-/// transformation, the new indexed load / store has effectively folded
-/// the add / subtract in and all of its other uses are redirected to the
-/// new load / store.
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
@@ -7733,7 +7967,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
@@ -7742,7 +7976,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
}
// Finally, since the node is now dead, remove it from the graph.
- DAG.DeleteNode(N);
+ deleteAndRecombine(N);
if (Swapped)
std::swap(BasePtr, Offset);
@@ -7792,23 +8026,20 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
SDLoc(OtherUses[i]),
OtherUses[i]->getValueType(0), NewOp1, NewOp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
- removeFromWorkList(OtherUses[i]);
- DAG.DeleteNode(OtherUses[i]);
+ deleteAndRecombine(OtherUses[i]);
}
// Replace the uses of Ptr with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
- removeFromWorkList(Ptr.getNode());
- DAG.DeleteNode(Ptr.getNode());
+ deleteAndRecombine(Ptr.getNode());
return true;
}
-/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
-/// add / sub of the base pointer node into a post-indexed load / store.
-/// The transformation folded the add / subtract into the new indexed
-/// load / store effectively and all of its uses are redirected to the
-/// new load / store.
+/// Try to combine a load/store with a add/sub of the base pointer node into a
+/// post-indexed load/store. The transformation folded the add/subtract into the
+/// new indexed load/store effectively and all of its uses are redirected to the
+/// new load/store.
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
@@ -7903,7 +8134,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
@@ -7912,13 +8143,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
}
// Finally, since the node is now dead, remove it from the graph.
- DAG.DeleteNode(N);
+ deleteAndRecombine(N);
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
Result.getValue(isLoad ? 1 : 0));
- removeFromWorkList(Op);
- DAG.DeleteNode(Op);
+ deleteAndRecombine(Op);
return true;
}
}
@@ -7927,6 +8157,30 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
+/// \brief Return the base-pointer arithmetic from an indexed \p LD.
+SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ assert(AM != ISD::UNINDEXED);
+ SDValue BP = LD->getOperand(1);
+ SDValue Inc = LD->getOperand(2);
+
+ // Some backends use TargetConstants for load offsets, but don't expect
+ // TargetConstants in general ADD nodes. We can convert these constants into
+ // regular Constants (if the constant is not opaque).
+ assert((Inc.getOpcode() != ISD::TargetConstant ||
+ !cast<ConstantSDNode>(Inc)->isOpaque()) &&
+ "Cannot split out indexing using opaque target constants");
+ if (Inc.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
+ Inc = DAG.getConstant(*ConstInc->getConstantIntValue(),
+ ConstInc->getValueType(0));
+ }
+
+ unsigned Opc =
+ (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
+ return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
+}
+
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -7950,33 +8204,46 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << "\nWith chain: ";
Chain.getNode()->dump(&DAG);
dbgs() << "\n");
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
- if (N->use_empty()) {
- removeFromWorkList(N);
- DAG.DeleteNode(N);
- }
+ if (N->use_empty())
+ deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
+
+ // If this load has an opaque TargetConstant offset, then we cannot split
+ // the indexing into an add/sub directly (that TargetConstant may not be
+ // valid for a different type of node, and we cannot convert an opaque
+ // target constant into a regular constant).
+ bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
+ cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
+
+ if (!N->hasAnyUseOfValue(0) &&
+ ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ SDValue Index;
+ if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
+ Index = SplitIndexingFromLoad(LD);
+ // Try to fold the base pointer arithmetic into subsequent loads and
+ // stores.
+ AddUsersToWorklist(N);
+ } else
+ Index = DAG.getUNDEF(N->getValueType(1));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
Undef.getNode()->dump(&DAG);
dbgs() << " and 2 other values\n");
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
- DAG.getUNDEF(N->getValueType(1)));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
- removeFromWorkList(N);
- DAG.DeleteNode(N);
+ deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -8004,15 +8271,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(), Align,
- LD->getTBAAInfo());
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), Align, LD->getAAInfo());
return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
- TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
#ifndef NDEBUG
if (CombinerAAOnlyFunc.getNumOccurrences() &&
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
@@ -8042,7 +8309,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
MVT::Other, Chain, ReplLoad.getValue(1));
// Make sure the new and old chains are cleaned up.
- AddToWorkList(Token.getNode());
+ AddToWorklist(Token.getNode());
// Replace uses with load result and token factor. Don't add users
// to work list.
@@ -8342,7 +8609,7 @@ struct LoadedSlice {
// At this point, we know that we perform a cross-register-bank copy.
// Check if it is expensive.
- const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
// Assume bitcasts are cheap, unless both register classes do not
// explicitly share a common sub class.
if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
@@ -8606,9 +8873,9 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
return true;
}
-/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
-/// load is having specific bytes cleared out. If so, return the byte size
-/// being masked out and the shift amount.
+/// Check to see if V is (and load (ptr), imm), where the load is having
+/// specific bytes cleared out. If so, return the byte size being masked out
+/// and the shift amount.
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
std::pair<unsigned, unsigned> Result(0, 0);
@@ -8681,9 +8948,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
}
-/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
-/// provides a value as specified by MaskInfo. If so, replace the specified
-/// store with a narrower store of truncated IVal.
+/// Check to see if IVal is something that provides a value as specified by
+/// MaskInfo. If so, replace the specified store with a narrower store of
+/// truncated IVal.
static SDNode *
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue IVal, StoreSDNode *St,
@@ -8738,10 +9005,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
}
-/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
-/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
-/// of the loaded bits, try narrowing the load and store if it would end up
-/// being a win for performance or code size.
+/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
+/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
+/// narrowing the load and store if it would end up being a win for performance
+/// or code size.
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
if (ST->isVolatile())
@@ -8841,7 +9108,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), NewAlign,
- LD->getTBAAInfo());
+ LD->getAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, SDLoc(N),
@@ -8849,10 +9116,10 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
ST->getPointerInfo().getWithOffset(PtrOff),
false, false, NewAlign);
- AddToWorkList(NewPtr.getNode());
- AddToWorkList(NewLD.getNode());
- AddToWorkList(NewVal.getNode());
- WorkListRemover DeadNodes(*this);
+ AddToWorklist(NewPtr.getNode());
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewVal.getNode());
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
++OpsNarrowed;
return NewST;
@@ -8862,10 +9129,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
}
-/// TransformFPLoadStorePair - For a given floating point load / store pair,
-/// if the load value isn't used by any other operations, then consider
-/// transforming the pair to integer load / store operations if the target
-/// deems the transformation profitable.
+/// For a given floating point load / store pair, if the load value isn't used
+/// by any other operations, then consider transforming the pair to integer
+/// load / store operations if the target deems the transformation profitable.
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -8907,9 +9173,9 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
ST->getPointerInfo(),
false, false, STAlign);
- AddToWorkList(NewLD.getNode());
- AddToWorkList(NewST.getNode());
- WorkListRemover DeadNodes(*this);
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewST.getNode());
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
++LdStFP2Int;
return NewST;
@@ -9039,7 +9305,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
return false;
// Only look at ends of store sequences.
- SDValue Chain = SDValue(St, 1);
+ SDValue Chain = SDValue(St, 0);
if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
return false;
@@ -9070,7 +9336,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
StoreSDNode *Index = St;
while (Index) {
// If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 1)->hasOneUse())
+ if (Index != St && !SDValue(Index, 0)->hasOneUse())
break;
// Find the base pointer and offset for this memory node.
@@ -9301,8 +9567,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Since we know that St is redundant, just iterate.
while (!St->use_empty())
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- removeFromWorkList(St);
- DAG.DeleteNode(St);
+ deleteAndRecombine(St);
}
return true;
@@ -9361,6 +9626,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (LoadNodes.size() < 2)
return false;
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother.
+ unsigned RequiredAlignment;
+ if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ St->getAlignment() >= RequiredAlignment)
+ return false;
+
// Scan the memory operations on the chain and find the first non-consecutive
// load memory address. These variables hold the index in the store node
// array.
@@ -9476,8 +9748,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
- removeFromWorkList(St);
- DAG.DeleteNode(St);
+ deleteAndRecombine(St);
}
return true;
@@ -9503,7 +9774,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), OrigAlign,
- ST->getTBAAInfo());
+ ST->getAAInfo());
}
// Turn 'store undef, Ptr' -> nothing.
@@ -9557,19 +9828,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
- ST->getAlignment(), TBAAInfo);
+ ST->getAlignment(), AAInfo);
Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
Ptr, ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal,
- Alignment, TBAAInfo);
+ Alignment, AAInfo);
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
St0, St1);
}
@@ -9586,7 +9857,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align,
- ST->getTBAAInfo());
+ ST->getAAInfo());
}
}
@@ -9596,8 +9867,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (NewST.getNode())
return NewST;
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
- TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
#ifndef NDEBUG
if (CombinerAAOnlyFunc.getNumOccurrences() &&
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
@@ -9625,7 +9896,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
MVT::Other, Chain, ReplStore);
// Make sure the new and old chains are cleaned up.
- AddToWorkList(Token.getNode());
+ AddToWorklist(Token.getNode());
// Don't add users to work list.
return CombineTo(N, Token, false);
@@ -9647,7 +9918,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
APInt::getLowBitsSet(
Value.getValueType().getScalarType().getSizeInBits(),
ST->getMemoryVT().getScalarType().getSizeInBits()));
- AddToWorkList(Value.getNode());
+ AddToWorklist(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
Ptr, ST->getMemoryVT(), ST->getMemOperand());
@@ -9674,6 +9945,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // If this is a store followed by a store with the same value to the same
+ // location, then the store is dead/noop.
+ if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+ if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
+ ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
+ ST1->isUnindexed() && !ST1->isVolatile()) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
// truncating store. We can do this even if this is already a truncstore.
if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
@@ -9741,7 +10023,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// Swap nodes.
SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
InVec.getOperand(0), InVal, EltNo);
- AddToWorkList(NewOp.getNode());
+ AddToWorklist(NewOp.getNode());
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
}
@@ -9829,32 +10111,32 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
? ISD::ZEXTLOAD
: ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
- NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
- OriginalLoad->isNonTemporal(), Align,
- OriginalLoad->getTBAAInfo());
+ Load = DAG.getExtLoad(
+ ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
+ VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
} else {
Load = DAG.getLoad(
VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
- OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
if (ResultVT.bitsLT(VecEltVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
else
Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
}
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
SDValue To[] = { Load, Chain };
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
- AddToWorkList(Load.getNode());
- AddUsersToWorkList(Load.getNode()); // Add users too
+ AddToWorklist(Load.getNode());
+ AddUsersToWorklist(Load.getNode()); // Add users too
// Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorkList(EVE);
+ AddToWorklist(EVE);
++OpsNarrowed;
return SDValue(EVE, 0);
}
@@ -9952,7 +10234,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
- ISD::isNormalLoad(InVec.getNode())) {
+ ISD::isNormalLoad(InVec.getNode()) &&
+ !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
SDValue Index = N->getOperand(1);
if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
@@ -10135,7 +10418,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
// The new BUILD_VECTOR node has the potential to be further optimized.
- AddToWorkList(BV.getNode());
+ AddToWorklist(BV.getNode());
// Bitcast to the desired type.
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
@@ -10201,7 +10484,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
Opnds.push_back(In.getOperand(0));
}
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
- AddToWorkList(BV.getNode());
+ AddToWorklist(BV.getNode());
return DAG.getNode(Opcode, dl, VT, BV);
}
@@ -10227,9 +10510,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
// at most two distinct vectors, turn this into a shuffle node.
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
// May only combine to shuffle after legalize if shuffle is legal.
- if (LegalOperations &&
- !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+ if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();
SDValue VecIn1, VecIn2;
@@ -10319,10 +10605,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
VecIn1.getValueType() != VT)
return SDValue();
- // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
- if (!isTypeLegal(VT))
- return SDValue();
-
// Return the new VECTOR_SHUFFLE node.
SDValue Ops[2];
Ops[0] = VecIn1;
@@ -10513,6 +10795,92 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
}
+static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
+ SDValue V, SelectionDAG &DAG) {
+ SDLoc DL(V);
+ EVT VT = V.getValueType();
+
+ switch (V.getOpcode()) {
+ default:
+ return V;
+
+ case ISD::CONCAT_VECTORS: {
+ EVT OpVT = V->getOperand(0).getValueType();
+ int OpSize = OpVT.getVectorNumElements();
+ SmallBitVector OpUsedElements(OpSize, false);
+ bool FoundSimplification = false;
+ SmallVector<SDValue, 4> NewOps;
+ NewOps.reserve(V->getNumOperands());
+ for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
+ SDValue Op = V->getOperand(i);
+ bool OpUsed = false;
+ for (int j = 0; j < OpSize; ++j)
+ if (UsedElements[i * OpSize + j]) {
+ OpUsedElements[j] = true;
+ OpUsed = true;
+ }
+ NewOps.push_back(
+ OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
+ : DAG.getUNDEF(OpVT));
+ FoundSimplification |= Op == NewOps.back();
+ OpUsedElements.reset();
+ }
+ if (FoundSimplification)
+ V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
+ return V;
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue BaseV = V->getOperand(0);
+ SDValue SubV = V->getOperand(1);
+ auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
+ if (!IdxN)
+ return V;
+
+ int SubSize = SubV.getValueType().getVectorNumElements();
+ int Idx = IdxN->getZExtValue();
+ bool SubVectorUsed = false;
+ SmallBitVector SubUsedElements(SubSize, false);
+ for (int i = 0; i < SubSize; ++i)
+ if (UsedElements[i + Idx]) {
+ SubVectorUsed = true;
+ SubUsedElements[i] = true;
+ UsedElements[i + Idx] = false;
+ }
+
+ // Now recurse on both the base and sub vectors.
+ SDValue SimplifiedSubV =
+ SubVectorUsed
+ ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
+ : DAG.getUNDEF(SubV.getValueType());
+ SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
+ if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
+ V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
+ return V;
+ }
+ }
+}
+
+static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
+ SDValue N1, SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ int NumElts = VT.getVectorNumElements();
+ SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
+ for (int M : SVN->getMask())
+ if (M >= 0 && M < NumElts)
+ N0UsedElements[M] = true;
+ else if (M >= NumElts)
+ N1UsedElements[M - NumElts] = true;
+
+ SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
+ SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
+ if (S0 == N0 && S1 == N1)
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
+}
+
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
@@ -10665,6 +11033,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // There are various patterns used to build up a vector from smaller vectors,
+ // subvectors, or elements. Scan chains of these and replace unused insertions
+ // or components with undef.
+ if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
+ return S;
+
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
(N1.getOpcode() == ISD::UNDEF ||
@@ -10699,7 +11073,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Idx = OtherSV->getMaskElt(Idx);
Mask.push_back(Idx);
}
-
+
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ bool isUndefMask = true;
+ for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+ isUndefMask &= Mask[i] < 0;
+
+ if (isUndefMask)
+ return DAG.getUNDEF(VT);
+
bool CommuteOperands = false;
if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
// To be valid, the combine shuffle mask should only reference elements
@@ -10738,12 +11120,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// The combined shuffle must map each index to itself.
IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
}
-
+
if (IsIdentityMask) {
if (CommuteOperands)
// optimize shuffle(shuffle(x, y), undef) -> y.
return OtherSV->getOperand(1);
-
+
// optimize shuffle(shuffle(x, undef), undef) -> x
// optimize shuffle(shuffle(x, y), undef) -> x
return OtherSV->getOperand(0);
@@ -10751,16 +11133,134 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// It may still be beneficial to combine the two shuffles if the
// resulting shuffle is legal.
+ if (TLI.isTypeLegal(VT)) {
+ if (!CommuteOperands) {
+ if (TLI.isShuffleMaskLegal(Mask, VT))
+ // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+ &Mask[0]);
+ } else {
+ // Compute the commuted shuffle mask.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElts)
+ Mask[i] = idx + NumElts;
+ else
+ Mask[i] = idx - NumElts;
+ }
+
+ if (TLI.isShuffleMaskLegal(Mask, VT))
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1,
+ &Mask[0]);
+ }
+ }
+ }
+
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ TLI.isTypeLegal(VT)) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so that next rule
+ // will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+ // Don't try to fold shuffles with illegal type.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) {
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = OtherSV->getOperand(0);
+ SDValue SV1 = OtherSV->getOperand(1);
+ bool HasSameOp0 = N1 == SV0;
+ bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
+ if (!HasSameOp0 && !IsSV1Undef && N1 != SV1)
+ // Early exit.
+ return SDValue();
+
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ if (Idx < (int)NumElts) {
+ Idx = OtherSV->getMaskElt(Idx);
+ if (IsSV1Undef && Idx >= (int) NumElts)
+ Idx = -1; // Propagate Undef.
+ } else
+ Idx = HasSameOp0 ? Idx - NumElts : Idx;
+
+ Mask.push_back(Idx);
+ }
+
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ bool isUndefMask = true;
+ for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+ isUndefMask &= Mask[i] < 0;
+
+ if (isUndefMask)
+ return DAG.getUNDEF(VT);
+
+ // Avoid introducing shuffles with illegal mask.
+ if (TLI.isShuffleMaskLegal(Mask, VT)) {
+ if (IsSV1Undef)
+ // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
+ }
+
+ // Compute the commuted shuffle mask.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElts)
+ Mask[i] = idx + NumElts;
+ else
+ Mask[i] = idx - NumElts;
+ }
+
if (TLI.isShuffleMaskLegal(Mask, VT)) {
- if (!CommuteOperands)
- // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
- // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
- return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
- &Mask[0]);
-
- // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
- return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
- &Mask[0]);
+ if (IsSV1Undef)
+ // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1, SV0, &Mask[0]);
+ // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV1, SV0, &Mask[0]);
}
}
@@ -10794,8 +11294,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return SDValue();
}
-/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
-/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
+/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
@@ -10817,7 +11317,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue())
- Indices.push_back(NumElts);
+ Indices.push_back(NumElts+i);
else
return SDValue();
}
@@ -10841,7 +11341,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
}
-/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+/// Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!");
@@ -10896,7 +11396,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
FoldOp.getOpcode() != ISD::ConstantFP)
break;
Ops.push_back(FoldOp);
- AddToWorkList(FoldOp.getNode());
+ AddToWorklist(FoldOp.getNode());
}
if (Ops.size() == LHS.getNumOperands())
@@ -10918,7 +11418,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue UndefVector = LHS.getOperand(1);
SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
LHS.getOperand(0), RHS.getOperand(0));
- AddUsersToWorkList(N);
+ AddUsersToWorklist(N);
return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
&SVN0->getMask()[0]);
}
@@ -10927,7 +11427,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return SDValue();
}
-/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
+/// Visit a binary vector operation, like FABS/FNEG.
SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
"SimplifyVUnaryOp only works on vectors!");
@@ -10950,7 +11450,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
FoldOp.getOpcode() != ISD::ConstantFP)
break;
Ops.push_back(FoldOp);
- AddToWorkList(FoldOp.getNode());
+ AddToWorklist(FoldOp.getNode());
}
if (Ops.size() != N0.getNumOperands())
@@ -10977,9 +11477,9 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
SCC.getOperand(4));
- AddToWorkList(SETCC.getNode());
- return DAG.getSelect(SDLoc(SCC), SCC.getValueType(),
- SCC.getOperand(2), SCC.getOperand(3), SETCC);
+ AddToWorklist(SETCC.getNode());
+ return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+ SCC.getOperand(2), SCC.getOperand(3));
}
return SCC;
@@ -10987,12 +11487,11 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
return SDValue();
}
-/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
-/// are the two values being selected between, see if we can simplify the
-/// select. Callers of this should assume that TheSelect is deleted if this
-/// returns true. As such, they should return the appropriate thing (e.g. the
-/// node) back to the top-level of the DAG combiner loop to avoid it being
-/// looked at.
+/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
+/// being selected between, see if we can simplify the select. Callers of this
+/// should assume that TheSelect is deleted if this returns true. As such, they
+/// should return the appropriate thing (e.g. the node) back to the top-level of
+/// the DAG combiner loop to avoid it being looked at.
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
@@ -11071,22 +11570,27 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
}
SDValue Load;
+ // It is safe to replace the two loads if they have different alignments,
+ // but the new load must be the minimum (most restrictive) alignment of the
+ // inputs.
+ bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
+ unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
Load = DAG.getLoad(TheSelect->getValueType(0),
SDLoc(TheSelect),
- // FIXME: Discards pointer and TBAA info.
+ // FIXME: Discards pointer and AA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->isVolatile(), LLD->isNonTemporal(),
- LLD->isInvariant(), LLD->getAlignment());
+ isInvariant, Alignment);
} else {
Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
RLD->getExtensionType() : LLD->getExtensionType(),
SDLoc(TheSelect),
TheSelect->getValueType(0),
- // FIXME: Discards pointer and TBAA info.
+ // FIXME: Discards pointer and AA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->getMemoryVT(), LLD->isVolatile(),
- LLD->isNonTemporal(), LLD->getAlignment());
+ LLD->isNonTemporal(), isInvariant, Alignment);
}
// Users of the select now use the result of the load.
@@ -11102,7 +11606,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
return false;
}
-/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3,
@@ -11118,7 +11622,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// Determine if the condition we're dealing with is constant
SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
N0, N1, CC, DL, false);
- if (SCC.getNode()) AddToWorkList(SCC.getNode());
+ if (SCC.getNode()) AddToWorklist(SCC.getNode());
ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
// fold select_cc true, x, y -> x
@@ -11186,13 +11690,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
SDValue Cond = DAG.getSetCC(DL,
getSetCCResultType(N0.getValueType()),
N0, N1, CC);
- AddToWorkList(Cond.getNode());
+ AddToWorklist(Cond.getNode());
SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
Cond, One, Zero);
- AddToWorkList(CstOffset.getNode());
+ AddToWorklist(CstOffset.getNode());
CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
- AddToWorkList(CPIdx.getNode());
+ AddToWorklist(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
false, false, Alignment);
@@ -11217,11 +11721,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
getShiftAmountTy(N0.getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
XType, N0, ShCt);
- AddToWorkList(Shift.getNode());
+ AddToWorklist(Shift.getNode());
if (XType.bitsGT(AType)) {
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
- AddToWorkList(Shift.getNode());
+ AddToWorklist(Shift.getNode());
}
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
@@ -11231,11 +11735,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
XType, N0,
DAG.getConstant(XType.getSizeInBits()-1,
getShiftAmountTy(N0.getValueType())));
- AddToWorkList(Shift.getNode());
+ AddToWorklist(Shift.getNode());
if (XType.bitsGT(AType)) {
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
- AddToWorkList(Shift.getNode());
+ AddToWorklist(Shift.getNode());
}
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
@@ -11305,8 +11809,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
N2.getValueType(), SCC);
}
- AddToWorkList(SCC.getNode());
- AddToWorkList(Temp.getNode());
+ AddToWorklist(SCC.getNode());
+ AddToWorklist(Temp.getNode());
if (N2C->getAPIntValue() == 1)
return Temp;
@@ -11385,8 +11889,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
getShiftAmountTy(N0.getValueType())));
SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
XType, N0, Shift);
- AddToWorkList(Shift.getNode());
- AddToWorkList(Add.getNode());
+ AddToWorklist(Shift.getNode());
+ AddToWorklist(Add.getNode());
return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
}
}
@@ -11394,7 +11898,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
return SDValue();
}
-/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+/// This is a stub for TargetLowering::SimplifySetCC.
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
SDValue N1, ISD::CondCode Cond,
SDLoc DL, bool foldBooleans) {
@@ -11403,10 +11907,10 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
}
-/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
-/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number. See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// Given an ISD::SDIV node expressing a divide by constant, return
+/// a DAG expression to select that will generate the same value by multiplying
+/// by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
if (!C)
@@ -11421,14 +11925,33 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
for (SDNode *N : Built)
- AddToWorkList(N);
+ AddToWorklist(N);
return S;
}
-/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant,
-/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number. See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
+/// DAG expression that will generate the same value by right shifting.
+SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (!C->getAPIntValue())
+ return SDValue();
+
+ std::vector<SDNode *> Built;
+ SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
+
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+}
+
+/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
+/// expression that will generate the same value by multiplying by a magic
+/// number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
if (!C)
@@ -11443,13 +11966,145 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
for (SDNode *N : Built)
- AddToWorkList(N);
+ AddToWorklist(N);
return S;
}
-/// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself. Provides base object and offset as
-// results.
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // Expose the DAG combiner to the target combiner implementations.
+ TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+
+ unsigned Iterations = 0;
+ if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
+ if (Iterations) {
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ SDValue FPOne = DAG.getConstantFP(1.0, VT);
+
+ AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+ AddToWorklist(Est.getNode());
+ }
+ }
+ return Est;
+ }
+
+ return SDValue();
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+/// As a result, we precompute A/2 prior to the iteration loop.
+SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
+ unsigned Iterations) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
+
+ // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+ AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ AddToWorklist(Est.getNode());
+ }
+ return Est;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
+SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
+ unsigned Iterations) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
+ SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
+
+ // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+ AddToWorklist(HalfEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ AddToWorklist(Est.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+ AddToWorklist(Est.getNode());
+
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+ AddToWorklist(Est.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+ AddToWorklist(Est.getNode());
+ }
+ return Est;
+}
+
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // Expose the DAG combiner to the target combiner implementations.
+ TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+ unsigned Iterations = 0;
+ bool UseOneConstNR = false;
+ if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+ AddToWorklist(Est.getNode());
+ if (Iterations) {
+ Est = UseOneConstNR ?
+ BuildRsqrtNROneConst(Op, Est, Iterations) :
+ BuildRsqrtNRTwoConst(Op, Est, Iterations);
+ }
+ return Est;
+ }
+
+ return SDValue();
+}
+
+/// Return true if base is a frame index, which is known not to alias with
+/// anything but itself. Provides base object and offset as results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
@@ -11485,8 +12140,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
return isa<FrameIndexSDNode>(Base);
}
-/// isAlias - Return true if there is any possibility that the two addresses
-/// overlap.
+/// Return true if there is any possibility that the two addresses overlap.
bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
// If they are the same then they must be aliases.
if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
@@ -11545,8 +12199,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
return false;
}
- bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
- TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
+ ? CombinerGlobalAA
+ : DAG.getSubtarget().useAA();
#ifndef NDEBUG
if (CombinerAAOnlyFunc.getNumOccurrences() &&
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
@@ -11564,10 +12219,10 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
AliasAnalysis::AliasResult AAResult =
AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
Overlap1,
- UseTBAA ? Op0->getTBAAInfo() : nullptr),
+ UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
Overlap2,
- UseTBAA ? Op1->getTBAAInfo() : nullptr));
+ UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
@@ -11576,7 +12231,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
return true;
}
-/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases) {
@@ -11612,7 +12267,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
}
// Don't bother if we've been before.
- if (!Visited.insert(Chain.getNode()))
+ if (!Visited.insert(Chain.getNode()).second)
continue;
switch (Chain.getOpcode()) {
@@ -11687,10 +12342,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// like register copies will interfere with trivial cases).
SmallVector<const SDNode *, 16> Worklist;
- for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(),
- IE = Visited.end(); I != IE; ++I)
- if (*I != OriginalChain.getNode())
- Worklist.push_back(*I);
+ for (const SDNode *N : Visited)
+ if (N != OriginalChain.getNode())
+ Worklist.push_back(N);
while (!Worklist.empty()) {
const SDNode *M = Worklist.pop_back_val();
@@ -11701,7 +12355,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
for (SDNode::use_iterator UI = M->use_begin(),
UIE = M->use_end(); UI != UIE; ++UI)
- if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) {
+ if (UI.getUse().getValueType() == MVT::Other &&
+ Visited.insert(*UI).second) {
if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
// We've not visited this use, and we care about it (it could have an
// ordering dependency with the original node).
@@ -11717,8 +12372,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
}
}
-/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
-/// for a better chain (aliasing node.)
+/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
+/// (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
@@ -11737,11 +12392,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
-// SelectionDAG::Combine - This is the entry point for the file.
-//
+/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
CodeGenOpt::Level OptLevel) {
- /// run - This is the main entry point to this class.
- ///
+ /// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level);
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 445572a..8facbc2 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,6 +39,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
@@ -64,19 +65,32 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "isel"
STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
- "target-independent selector");
+ "target-independent selector");
STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
- "target-specific selector");
+ "target-specific selector");
STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
-/// startNewBlock - Set the current block to which generated machine
-/// instructions will be appended, and clear the local CSE map.
-///
+void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned AttrIdx) {
+ IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
+ IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
+ IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
+ IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
+ IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
+ IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
+ IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
+ Alignment = CS->getParamAlignment(AttrIdx);
+}
+
+/// Set the current block to which generated machine instructions will be
+/// appended, and clear the local CSE map.
void FastISel::startNewBlock() {
LocalValueMap.clear();
@@ -89,18 +103,19 @@ void FastISel::startNewBlock() {
LastLocalValue = EmitStartPt;
}
-bool FastISel::LowerArguments() {
+bool FastISel::lowerArguments() {
if (!FuncInfo.CanLowerReturn)
// Fallback to SDISel argument lowering code to deal with sret pointer
// parameter.
return false;
- if (!FastLowerArguments())
+ if (!fastLowerArguments())
return false;
// Enter arguments into ValueMap for uses in non-entry BBs.
for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
- E = FuncInfo.Fn->arg_end(); I != E; ++I) {
+ E = FuncInfo.Fn->arg_end();
+ I != E; ++I) {
DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
assert(VI != LocalValueMap.end() && "Missed an argument?");
FuncInfo.ValueMap[I] = VI->second;
@@ -112,22 +127,30 @@ void FastISel::flushLocalValueMap() {
LocalValueMap.clear();
LastLocalValue = EmitStartPt;
recomputeInsertPt();
+ SavedInsertPt = FuncInfo.InsertPt;
}
-bool FastISel::hasTrivialKill(const Value *V) const {
+bool FastISel::hasTrivialKill(const Value *V) {
// Don't consider constants or arguments to have trivial kills.
const Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return false;
// No-op casts are trivially coalesced by fast-isel.
- if (const CastInst *Cast = dyn_cast<CastInst>(I))
+ if (const auto *Cast = dyn_cast<CastInst>(I))
if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) &&
!hasTrivialKill(Cast->getOperand(0)))
return false;
+ // Even the value might have only one use in the LLVM IR, it is possible that
+ // FastISel might fold the use into another instruction and now there is more
+ // than one use at the Machine Instruction level.
+ unsigned Reg = lookUpRegForValue(V);
+ if (Reg && !MRI.use_empty(Reg))
+ return false;
+
// GEPs with all zero indices are trivially coalesced by fast-isel.
- if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(I))
if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
return false;
@@ -160,7 +183,7 @@ unsigned FastISel::getRegForValue(const Value *V) {
// Look up the value to see if we already have a register for it.
unsigned Reg = lookUpRegForValue(V);
- if (Reg != 0)
+ if (Reg)
return Reg;
// In bottom-up mode, just create the virtual register which will be used
@@ -181,29 +204,24 @@ unsigned FastISel::getRegForValue(const Value *V) {
return Reg;
}
-/// materializeRegForValue - Helper for getRegForValue. This function is
-/// called when the value isn't already available in a register and must
-/// be materialized with new instructions.
-unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
unsigned Reg = 0;
-
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getValue().getActiveBits() <= 64)
- Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
- } else if (isa<AllocaInst>(V)) {
- Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
- } else if (isa<ConstantPointerNull>(V)) {
+ Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V))
+ Reg = fastMaterializeAlloca(cast<AllocaInst>(V));
+ else if (isa<ConstantPointerNull>(V))
// Translate this as an integer zero so that it can be
// local-CSE'd with actual integer zeros.
- Reg =
- getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext())));
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- if (CF->isNullValue()) {
- Reg = TargetMaterializeFloatZero(CF);
- } else {
+ Reg = getRegForValue(
+ Constant::getNullValue(DL.getIntPtrType(V->getContext())));
+ else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue())
+ Reg = fastMaterializeFloatZero(CF);
+ else
// Try to emit the constant directly.
- Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
- }
+ Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF);
if (!Reg) {
// Try to emit the constant by using an integer constant with a cast.
@@ -213,22 +231,22 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
uint64_t x[2];
uint32_t IntBitWidth = IntVT.getSizeInBits();
bool isExact;
- (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
if (isExact) {
APInt IntVal(IntBitWidth, x);
unsigned IntegerReg =
- getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ getRegForValue(ConstantInt::get(V->getContext(), IntVal));
if (IntegerReg != 0)
- Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
- IntegerReg, /*Kill=*/false);
+ Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
+ /*Kill=*/false);
}
}
- } else if (const Operator *Op = dyn_cast<Operator>(V)) {
- if (!SelectOperator(Op, Op->getOpcode()))
+ } else if (const auto *Op = dyn_cast<Operator>(V)) {
+ if (!selectOperator(Op, Op->getOpcode()))
if (!isa<Instruction>(Op) ||
- !TargetSelectInstruction(cast<Instruction>(Op)))
+ !fastSelectInstruction(cast<Instruction>(Op)))
return 0;
Reg = lookUpRegForValue(Op);
} else if (isa<UndefValue>(V)) {
@@ -236,15 +254,26 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
+ return Reg;
+}
+
+/// Helper for getRegForValue. This function is called when the value isn't
+/// already available in a register and must be materialized with new
+/// instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+ // Give the target-specific code a try first.
+ if (isa<Constant>(V))
+ Reg = fastMaterializeConstant(cast<Constant>(V));
- // If target-independent code couldn't handle the value, give target-specific
- // code a try.
- if (!Reg && isa<Constant>(V))
- Reg = TargetMaterializeConstant(cast<Constant>(V));
+ // If target-specific code couldn't or didn't want to handle the value, then
+ // give target-independent code a try.
+ if (!Reg)
+ Reg = materializeConstant(V, VT);
// Don't cache constant materializations in the general ValueMap.
// To do so would require tracking what uses they dominate.
- if (Reg != 0) {
+ if (Reg) {
LocalValueMap[V] = Reg;
LastLocalValue = MRI.getVRegDef(Reg);
}
@@ -262,13 +291,7 @@ unsigned FastISel::lookUpRegForValue(const Value *V) {
return LocalValueMap[V];
}
-/// UpdateValueMap - Update the value map to include the new mapping for this
-/// instruction, or insert an extra copy to get the result in a previous
-/// determined register.
-/// NOTE: This is only necessary because we might select a block that uses
-/// a value before we select the block that defines the value. It might be
-/// possible to fix this by selecting blocks in reverse postorder.
-void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
if (!isa<Instruction>(I)) {
LocalValueMap[I] = Reg;
return;
@@ -281,7 +304,7 @@ void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
else if (Reg != AssignedReg) {
// Arrange for uses of AssignedReg to be replaced by uses of Reg.
for (unsigned i = 0; i < NumRegs; i++)
- FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
+ FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
AssignedReg = Reg;
}
@@ -299,13 +322,12 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
MVT PtrVT = TLI.getPointerTy();
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
if (IdxVT.bitsLT(PtrVT)) {
- IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
- IdxN, IdxNIsKill);
+ IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN,
+ IdxNIsKill);
IdxNIsKill = true;
- }
- else if (IdxVT.bitsGT(PtrVT)) {
- IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
- IdxN, IdxNIsKill);
+ } else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN =
+ fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill);
IdxNIsKill = true;
}
return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
@@ -327,7 +349,7 @@ void FastISel::recomputeInsertPt() {
void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) {
- assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!");
while (I != E) {
MachineInstr *Dead = &*I;
++I;
@@ -342,7 +364,7 @@ FastISel::SavePoint FastISel::enterLocalValueArea() {
DebugLoc OldDL = DbgLoc;
recomputeInsertPt();
DbgLoc = DebugLoc();
- SavePoint SP = { OldInsertPt, OldDL };
+ SavePoint SP = {OldInsertPt, OldDL};
return SP;
}
@@ -355,10 +377,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
DbgLoc = OldInsertPt.DL;
}
-/// SelectBinaryOp - Select and emit code for a binary operator instruction,
-/// which has an opcode which directly corresponds to the given ISD opcode.
-///
-bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
+bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
if (VT == MVT::Other || !VT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
@@ -371,9 +390,8 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
if (!TLI.isTypeLegal(VT)) {
// MVT::i1 is special. Allow AND, OR, or XOR because they
// don't require additional zeroing, which makes them easy.
- if (VT == MVT::i1 &&
- (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
- ISDOpcode == ISD::XOR))
+ if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
VT = TLI.getTypeToTransformTo(I->getContext(), VT);
else
return false;
@@ -381,38 +399,36 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
// Check if the first operand is a constant, and handle it as "ri". At -O0,
// we don't have anything that canonicalizes operand order.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
unsigned Op1 = getRegForValue(I->getOperand(1));
- if (Op1 == 0) return false;
-
+ if (!Op1)
+ return false;
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
- unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
- Op1IsKill, CI->getZExtValue(),
- VT.getSimpleVT());
- if (ResultReg == 0) return false;
+ unsigned ResultReg =
+ fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill,
+ CI->getZExtValue(), VT.getSimpleVT());
+ if (!ResultReg)
+ return false;
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-
unsigned Op0 = getRegForValue(I->getOperand(0));
- if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail.
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
-
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// Check if the second operand is a constant and handle it appropriately.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint64_t Imm = CI->getZExtValue();
// Transform "sdiv exact X, 8" -> "sra X, 3".
if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
- cast<BinaryOperator>(I)->isExact() &&
- isPowerOf2_64(Imm)) {
+ cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) {
Imm = Log2_64(Imm);
ISDOpcode = ISD::SRA;
}
@@ -424,54 +440,49 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::AND;
}
- unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT());
- if (ResultReg == 0) return false;
+ if (!ResultReg)
+ return false;
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// Check if the second operand is a constant float.
- if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
- unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
ISDOpcode, Op0, Op0IsKill, CF);
- if (ResultReg != 0) {
+ if (ResultReg) {
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
unsigned Op1 = getRegForValue(I->getOperand(1));
- if (Op1 == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
return false;
-
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
// Now we have both operands in registers. Emit the instruction.
- unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode,
- Op0, Op0IsKill,
- Op1, Op1IsKill);
- if (ResultReg == 0)
+ unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill);
+ if (!ResultReg)
// Target-specific code wasn't able to find a machine opcode for
// the given ISD opcode and type. Halt "fast" selection and bail.
return false;
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool FastISel::SelectGetElementPtr(const User *I) {
+bool FastISel::selectGetElementPtr(const User *I) {
unsigned N = getRegForValue(I->getOperand(0));
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
-
bool NIsKill = hasTrivialKill(I->getOperand(0));
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
@@ -481,18 +492,18 @@ bool FastISel::SelectGetElementPtr(const User *I) {
uint64_t MaxOffs = 2048;
Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
- for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
- E = I->op_end(); OI != E; ++OI) {
+ for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1,
+ E = I->op_end();
+ OI != E; ++OI) {
const Value *Idx = *OI;
- if (StructType *StTy = dyn_cast<StructType>(Ty)) {
+ if (auto *StTy = dyn_cast<StructType>(Ty)) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
if (TotalOffs >= MaxOffs) {
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
TotalOffs = 0;
@@ -503,15 +514,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
Ty = cast<SequentialType>(Ty)->getElementType();
// If this is a constant subscript, handle it quickly.
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->isZero()) continue;
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
// N = N + Offset
TotalOffs +=
- DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
if (TotalOffs >= MaxOffs) {
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
TotalOffs = 0;
@@ -519,9 +530,8 @@ bool FastISel::SelectGetElementPtr(const User *I) {
continue;
}
if (TotalOffs) {
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
TotalOffs = 0;
@@ -532,43 +542,37 @@ bool FastISel::SelectGetElementPtr(const User *I) {
std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
unsigned IdxN = Pair.first;
bool IdxNIsKill = Pair.second;
- if (IdxN == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
if (ElementSize != 1) {
- IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
- if (IdxN == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
IdxNIsKill = true;
}
- N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
}
}
if (TotalOffs) {
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
}
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, N);
+ updateValueMap(I, N);
return true;
}
-/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands
-/// to a stackmap or patchpoint machine instruction.
bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
const CallInst *CI, unsigned StartIdx) {
for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
Value *Val = CI->getArgOperand(i);
// Check for constants and encode them with a StackMaps::ConstantOp prefix.
- if (auto *C = dyn_cast<ConstantInt>(Val)) {
+ if (const auto *C = dyn_cast<ConstantInt>(Val)) {
Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
} else if (isa<ConstantPointerNull>(Val)) {
@@ -585,16 +589,15 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
return false;
} else {
unsigned Reg = getRegForValue(Val);
- if (Reg == 0)
+ if (!Reg)
return false;
Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
}
}
-
return true;
}
-bool FastISel::SelectStackmap(const CallInst *I) {
+bool FastISel::selectStackmap(const CallInst *I) {
// void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
// [live variables...])
assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
@@ -621,7 +624,7 @@ bool FastISel::SelectStackmap(const CallInst *I) {
assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
"Expected a constant integer.");
const auto *NumBytes =
- cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
// Push live variables for the stack map (skipping the first two arguments
@@ -637,13 +640,13 @@ bool FastISel::SelectStackmap(const CallInst *I) {
const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
for (unsigned i = 0; ScratchRegs[i]; ++i)
Ops.push_back(MachineOperand::CreateReg(
- ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
- /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+ ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+ /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(0);
+ .addImm(0);
// Issue STACKMAP.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -654,7 +657,8 @@ bool FastISel::SelectStackmap(const CallInst *I) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
- .addImm(0).addImm(0);
+ .addImm(0)
+ .addImm(0);
// Inform the Frame Information that we have a stackmap in this function.
FuncInfo.MF->getFrameInfo()->setHasStackMap();
@@ -662,11 +666,370 @@ bool FastISel::SelectStackmap(const CallInst *I) {
return true;
}
-bool FastISel::SelectCall(const User *I) {
+/// \brief Lower an argument list according to the target calling convention.
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
+ unsigned NumArgs, const Value *Callee,
+ bool ForceRetVoidTy, CallLoweringInfo &CLI) {
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ ImmutableCallSite CS(CI);
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ ArgI != ArgE; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, AttrI);
+ Args.push_back(Entry);
+ }
+
+ Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext())
+ : CI->getType();
+ CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::selectPatchpoint(const CallInst *I) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+ CallingConv::ID CC = I->getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !I->getType()->isVoidTy();
+ Value *Callee = I->getOperand(PatchPointOpers::TargetPos);
+
+ // Get the real number of arguments participating in the call <numArgs>
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
+ "Expected a constant integer.");
+ const auto *NumArgsVal =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos));
+ unsigned NumArgs = NumArgsVal->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // This includes all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ CallLoweringInfo CLI;
+ if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI))
+ return false;
+
+ assert(CLI.Call && "No call instruction specified.");
+
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add an explicit result reg if we use the anyreg calling convention.
+ if (IsAnyRegCC && HasDef) {
+ assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
+ CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
+ CLI.NumResultRegs = 1;
+ Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true));
+ }
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Assume that the callee is a constant address or null pointer.
+ // FIXME: handle function symbols in the future.
+ uint64_t CalleeAddr;
+ if (const auto *C = dyn_cast<IntToPtrInst>(Callee))
+ CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
+ if (C->getOpcode() == Instruction::IntToPtr)
+ CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ else
+ llvm_unreachable("Unsupported ConstantExpr.");
+ } else if (isa<ConstantPointerNull>(Callee))
+ CalleeAddr = 0;
+ else
+ llvm_unreachable("Unsupported callee address.");
+
+ Ops.push_back(MachineOperand::CreateImm(CalleeAddr));
+
+ // Adjust <numArgs> to account for any arguments that have been passed on
+ // the stack instead.
+ unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size();
+ Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs));
+
+ // Add the calling convention
+ Ops.push_back(MachineOperand::CreateImm((unsigned)CC));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (IsAnyRegCC) {
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) {
+ unsigned Reg = getRegForValue(I->getArgOperand(i));
+ if (!Reg)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ }
+ }
+
+ // Push the arguments from the call instruction.
+ for (auto Reg : CLI.OutRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+
+ // Push live variables for the stack map.
+ if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
+ return false;
+
+ // Push the register mask info.
+ Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC)));
+
+ // Add scratch registers as implicit def and early clobber.
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+ /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+
+ // Add implicit defs (return values).
+ for (auto Reg : CLI.InRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true,
+ /*IsImpl=*/true));
+
+ // Insert the patchpoint instruction before the call generated by the target.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
+ TII.get(TargetOpcode::PATCHPOINT));
+
+ for (auto &MO : Ops)
+ MIB.addOperand(MO);
+
+ MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ // Delete the original call instruction.
+ CLI.Call->eraseFromParent();
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+
+ if (CLI.NumResultRegs)
+ updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
+ return true;
+}
+
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+ Attrs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
+ unsigned NumArgs) {
+ ImmutableCallSite CS(CI);
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FTy->getReturnType();
+
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, ArgI + 1);
+ Args.push_back(Entry);
+ }
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FTy, SymName, std::move(Args), CS, NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
+ // Handle the incoming return values from the call.
+ CLI.clearIns();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, CLI.RetTy, RetTys);
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(
+ CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ // FIXME: sret demotion isn't supported yet - bail out.
+ if (!CanLowerReturn)
+ return false;
+
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
+ // Handle all of the outgoing arguments.
+ CLI.clearOuts();
+ for (auto &Arg : CLI.getArgs()) {
+ Type *FinalType = Arg.Ty;
+ if (Arg.IsByVal)
+ FinalType = cast<PointerType>(Arg.Ty)->getElementType();
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg);
+
+ ISD::ArgFlagsTy Flags;
+ if (Arg.IsZExt)
+ Flags.setZExt();
+ if (Arg.IsSExt)
+ Flags.setSExt();
+ if (Arg.IsInReg)
+ Flags.setInReg();
+ if (Arg.IsSRet)
+ Flags.setSRet();
+ if (Arg.IsByVal)
+ Flags.setByVal();
+ if (Arg.IsInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling in
+ // the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.IsByVal || Arg.IsInAlloca) {
+ PointerType *Ty = cast<PointerType>(Arg.Ty);
+ Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should come from FE. BE will guess if this info is
+ // not there, but there are cases it cannot get right.
+ unsigned FrameAlign = Arg.Alignment;
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (Arg.IsNest)
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ CLI.OutVals.push_back(Arg.Val);
+ CLI.OutFlags.push_back(Flags);
+ }
+
+ if (!fastLowerCall(CLI))
+ return false;
+
+ // Set all unused physreg defs as dead.
+ assert(CLI.Call && "No call instruction specified.");
+ CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ if (CLI.NumResultRegs && CLI.CS)
+ updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
+
+ return true;
+}
+
+bool FastISel::lowerCall(const CallInst *CI) {
+ ImmutableCallSite CS(CI);
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FuncTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FuncTy->getReturnType();
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Args.push_back(Entry);
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within fastLowerCall.
+ bool IsTailCall = CI->isTailCall();
+ if (IsTailCall && !isInTailCallPosition(CS, TM))
+ IsTailCall = false;
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
+ .setTailCall(IsTailCall);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::selectCall(const User *I) {
const CallInst *Call = cast<CallInst>(I);
// Handle simple inline asms.
if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ // If the inline asm has side effects, then make sure that no local value
+ // lives across by flushing the local value map.
+ if (IA->hasSideEffects())
+ flushLocalValueMap();
+
// Don't attempt to handle constraints.
if (!IA->getConstraintString().empty())
return false;
@@ -679,34 +1042,46 @@ bool FastISel::SelectCall(const User *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::INLINEASM))
- .addExternalSymbol(IA->getAsmString().c_str())
- .addImm(ExtraInfo);
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
return true;
}
MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
ComputeUsesVAFloatArgument(*Call, &MMI);
- const Function *F = Call->getCalledFunction();
- if (!F) return false;
+ // Handle intrinsic function calls.
+ if (const auto *II = dyn_cast<IntrinsicInst>(Call))
+ return selectIntrinsicCall(II);
+
+ // Usually, it does not make sense to initialize a value,
+ // make an unrelated function call and use the value, because
+ // it tends to be spilled on the stack. So, we move the pointer
+ // to the last local value to the beginning of the block, so that
+ // all the values which have already been materialized,
+ // appear after the call. It also makes sense to skip intrinsics
+ // since they tend to be inlined.
+ flushLocalValueMap();
- // Handle selected intrinsic function calls.
- switch (F->getIntrinsicID()) {
- default: break;
- // At -O0 we don't care about the lifetime intrinsics.
+ return lowerCall(Call);
+}
+
+bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ // At -O0 we don't care about the lifetime intrinsics.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
- // The donothing intrinsic does, well, nothing.
+ // The donothing intrinsic does, well, nothing.
case Intrinsic::donothing:
return true;
-
case Intrinsic::dbg_declare: {
- const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
DIVariable DIVar(DI->getVariable());
assert((!DIVar || DIVar.isVariable()) &&
- "Variable in DbgDeclareInst should be either null or a DIVariable.");
- if (!DIVar ||
- !FuncInfo.MF->getMMI().hasDebugInfo()) {
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) {
DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}
@@ -719,11 +1094,11 @@ bool FastISel::SelectCall(const User *I) {
unsigned Offset = 0;
Optional<MachineOperand> Op;
- if (const Argument *Arg = dyn_cast<Argument>(Address))
+ if (const auto *Arg = dyn_cast<Argument>(Address))
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Op = MachineOperand::CreateFI(Offset);
+ Op = MachineOperand::CreateFI(Offset);
if (!Op)
if (unsigned Reg = lookUpRegForValue(Address))
Op = MachineOperand::CreateReg(Reg, false);
@@ -750,13 +1125,14 @@ bool FastISel::SelectCall(const User *I) {
Op->setIsDebug(true);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
- DI->getVariable());
+ DI->getVariable(), DI->getExpression());
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
.addOperand(*Op)
.addImm(0)
- .addMetadata(DI->getVariable());
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -766,33 +1142,41 @@ bool FastISel::SelectCall(const User *I) {
}
case Intrinsic::dbg_value: {
// This form of DBG_VALUE is target-independent.
- const DbgValueInst *DI = cast<DbgValueInst>(Call);
+ const DbgValueInst *DI = cast<DbgValueInst>(II);
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
const Value *V = DI->getValue();
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(0U).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
- } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ .addReg(0U)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addCImm(CI).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
+ .addCImm(CI)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addImm(CI->getZExtValue()).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ .addImm(CI->getZExtValue())
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addFPImm(CF).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
+ .addFPImm(CF)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
} else if (unsigned Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
bool IsIndirect = DI->getOffset() != 0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect,
- Reg, DI->getOffset(), DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
+ DI->getOffset(), DI->getVariable(), DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -801,46 +1185,38 @@ bool FastISel::SelectCall(const User *I) {
return true;
}
case Intrinsic::objectsize: {
- ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+ ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
unsigned long long Res = CI->isZero() ? -1ULL : 0;
- Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+ Constant *ResCI = ConstantInt::get(II->getType(), Res);
unsigned ResultReg = getRegForValue(ResCI);
- if (ResultReg == 0)
+ if (!ResultReg)
return false;
- UpdateValueMap(Call, ResultReg);
+ updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::expect: {
- unsigned ResultReg = getRegForValue(Call->getArgOperand(0));
- if (ResultReg == 0)
+ unsigned ResultReg = getRegForValue(II->getArgOperand(0));
+ if (!ResultReg)
return false;
- UpdateValueMap(Call, ResultReg);
+ updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::experimental_stackmap:
- return SelectStackmap(Call);
+ return selectStackmap(II);
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ return selectPatchpoint(II);
}
- // Usually, it does not make sense to initialize a value,
- // make an unrelated function call and use the value, because
- // it tends to be spilled on the stack. So, we move the pointer
- // to the last local value to the beginning of the block, so that
- // all the values which have already been materialized,
- // appear after the call. It also makes sense to skip intrinsics
- // since they tend to be inlined.
- if (!isa<IntrinsicInst>(Call))
- flushLocalValueMap();
-
- // An arbitrary call. Bail.
- return false;
+ return fastLowerIntrinsicCall(II);
}
-bool FastISel::SelectCast(const User *I, unsigned Opcode) {
+bool FastISel::selectCast(const User *I, unsigned Opcode) {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
- if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
- DstVT == MVT::Other || !DstVT.isSimple())
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other ||
+ !DstVT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
@@ -859,24 +1235,22 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
- unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
- DstVT.getSimpleVT(),
- Opcode,
- InputReg, InputRegIsKill);
+ unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ Opcode, InputReg, InputRegIsKill);
if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool FastISel::SelectBitCast(const User *I) {
+bool FastISel::selectBitCast(const User *I) {
// If the bitcast doesn't change the type, just use the operand value.
if (I->getType() == I->getOperand(0)->getType()) {
unsigned Reg = getRegForValue(I->getOperand(0));
- if (Reg == 0)
+ if (!Reg)
return false;
- UpdateValueMap(I, Reg);
+ updateValueMap(I, Reg);
return true;
}
@@ -891,17 +1265,15 @@ bool FastISel::SelectBitCast(const User *I) {
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DstVT = DstEVT.getSimpleVT();
unsigned Op0 = getRegForValue(I->getOperand(0));
- if (Op0 == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
-
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT == DstVT) {
- const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
- const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
// Don't attempt a cross-class copy. It will likely fail.
if (SrcClass == DstClass) {
ResultReg = createResultReg(DstClass);
@@ -912,28 +1284,27 @@ bool FastISel::SelectBitCast(const User *I) {
// If the reg-reg copy failed, select a BITCAST opcode.
if (!ResultReg)
- ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
+ ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool
-FastISel::SelectInstruction(const Instruction *I) {
+bool FastISel::selectInstruction(const Instruction *I) {
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
if (isa<TerminatorInst>(I))
- if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+ if (!handlePHINodesInSuccessorBlocks(I->getParent()))
return false;
DbgLoc = I->getDebugLoc();
- MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+ SavedInsertPt = FuncInfo.InsertPt;
- if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+ if (const auto *Call = dyn_cast<CallInst>(I)) {
const Function *F = Call->getCalledFunction();
LibFunc::Func Func;
@@ -951,40 +1322,39 @@ FastISel::SelectInstruction(const Instruction *I) {
}
// First, try doing target-independent selection.
- if (SelectOperator(I, I->getOpcode())) {
- ++NumFastIselSuccessIndependent;
- DbgLoc = DebugLoc();
- return true;
- }
- // Remove dead code. However, ignore call instructions since we've flushed
- // the local value map and recomputed the insert point.
- if (!isa<CallInst>(I)) {
+ if (!SkipTargetIndependentISel) {
+ if (selectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
+ DbgLoc = DebugLoc();
+ return true;
+ }
+ // Remove dead code.
recomputeInsertPt();
if (SavedInsertPt != FuncInfo.InsertPt)
removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ SavedInsertPt = FuncInfo.InsertPt;
}
-
// Next, try calling the target to attempt to handle the instruction.
- SavedInsertPt = FuncInfo.InsertPt;
- if (TargetSelectInstruction(I)) {
+ if (fastSelectInstruction(I)) {
++NumFastIselSuccessTarget;
DbgLoc = DebugLoc();
return true;
}
- // Check for dead code and remove as necessary.
+ // Remove dead code.
recomputeInsertPt();
if (SavedInsertPt != FuncInfo.InsertPt)
removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
DbgLoc = DebugLoc();
+ // Undo phi node updates, because they will be added again by SelectionDAG.
+ if (isa<TerminatorInst>(I))
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
}
-/// FastEmitBranch - Emit an unconditional branch to the given block,
-/// unless it is the immediate (fall-through) successor, and update
-/// the CFG.
-void
-FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
+/// Emit an unconditional branch to the given block, unless it is the immediate
+/// (fall-through) successor, and update the CFG.
+void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
// For more accurate line information if this is the only instruction
@@ -1002,54 +1372,51 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
}
-/// SelectFNeg - Emit an FNeg operation.
-///
-bool
-FastISel::SelectFNeg(const User *I) {
+/// Emit an FNeg operation.
+bool FastISel::selectFNeg(const User *I) {
unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
- if (OpReg == 0) return false;
-
+ if (!OpReg)
+ return false;
bool OpRegIsKill = hasTrivialKill(I);
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(I->getType());
- unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
- ISD::FNEG, OpReg, OpRegIsKill);
- if (ResultReg != 0) {
- UpdateValueMap(I, ResultReg);
+ unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
+ OpReg, OpRegIsKill);
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
return true;
}
// Bitcast the value to integer, twiddle the sign bit with xor,
// and then bitcast it back to floating-point.
- if (VT.getSizeInBits() > 64) return false;
+ if (VT.getSizeInBits() > 64)
+ return false;
EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
if (!TLI.isTypeLegal(IntVT))
return false;
- unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
ISD::BITCAST, OpReg, OpRegIsKill);
- if (IntReg == 0)
+ if (!IntReg)
return false;
- unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
- IntReg, /*Kill=*/true,
- UINT64_C(1) << (VT.getSizeInBits()-1),
- IntVT.getSimpleVT());
- if (IntResultReg == 0)
+ unsigned IntResultReg = fastEmit_ri_(
+ IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
+ UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
+ if (!IntResultReg)
return false;
- ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
- ISD::BITCAST, IntResultReg, /*Kill=*/true);
- if (ResultReg == 0)
+ ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
+ IntResultReg, /*IsKill=*/true);
+ if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool
-FastISel::SelectExtractValue(const User *U) {
+bool FastISel::selectExtractValue(const User *U) {
const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
if (!EVI)
return false;
@@ -1085,55 +1452,54 @@ FastISel::SelectExtractValue(const User *U) {
for (unsigned i = 0; i < VTIndex; i++)
ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
- UpdateValueMap(EVI, ResultReg);
+ updateValueMap(EVI, ResultReg);
return true;
}
-bool
-FastISel::SelectOperator(const User *I, unsigned Opcode) {
+bool FastISel::selectOperator(const User *I, unsigned Opcode) {
switch (Opcode) {
case Instruction::Add:
- return SelectBinaryOp(I, ISD::ADD);
+ return selectBinaryOp(I, ISD::ADD);
case Instruction::FAdd:
- return SelectBinaryOp(I, ISD::FADD);
+ return selectBinaryOp(I, ISD::FADD);
case Instruction::Sub:
- return SelectBinaryOp(I, ISD::SUB);
+ return selectBinaryOp(I, ISD::SUB);
case Instruction::FSub:
// FNeg is currently represented in LLVM IR as a special case of FSub.
if (BinaryOperator::isFNeg(I))
- return SelectFNeg(I);
- return SelectBinaryOp(I, ISD::FSUB);
+ return selectFNeg(I);
+ return selectBinaryOp(I, ISD::FSUB);
case Instruction::Mul:
- return SelectBinaryOp(I, ISD::MUL);
+ return selectBinaryOp(I, ISD::MUL);
case Instruction::FMul:
- return SelectBinaryOp(I, ISD::FMUL);
+ return selectBinaryOp(I, ISD::FMUL);
case Instruction::SDiv:
- return SelectBinaryOp(I, ISD::SDIV);
+ return selectBinaryOp(I, ISD::SDIV);
case Instruction::UDiv:
- return SelectBinaryOp(I, ISD::UDIV);
+ return selectBinaryOp(I, ISD::UDIV);
case Instruction::FDiv:
- return SelectBinaryOp(I, ISD::FDIV);
+ return selectBinaryOp(I, ISD::FDIV);
case Instruction::SRem:
- return SelectBinaryOp(I, ISD::SREM);
+ return selectBinaryOp(I, ISD::SREM);
case Instruction::URem:
- return SelectBinaryOp(I, ISD::UREM);
+ return selectBinaryOp(I, ISD::UREM);
case Instruction::FRem:
- return SelectBinaryOp(I, ISD::FREM);
+ return selectBinaryOp(I, ISD::FREM);
case Instruction::Shl:
- return SelectBinaryOp(I, ISD::SHL);
+ return selectBinaryOp(I, ISD::SHL);
case Instruction::LShr:
- return SelectBinaryOp(I, ISD::SRL);
+ return selectBinaryOp(I, ISD::SRL);
case Instruction::AShr:
- return SelectBinaryOp(I, ISD::SRA);
+ return selectBinaryOp(I, ISD::SRA);
case Instruction::And:
- return SelectBinaryOp(I, ISD::AND);
+ return selectBinaryOp(I, ISD::AND);
case Instruction::Or:
- return SelectBinaryOp(I, ISD::OR);
+ return selectBinaryOp(I, ISD::OR);
case Instruction::Xor:
- return SelectBinaryOp(I, ISD::XOR);
+ return selectBinaryOp(I, ISD::XOR);
case Instruction::GetElementPtr:
- return SelectGetElementPtr(I);
+ return selectGetElementPtr(I);
case Instruction::Br: {
const BranchInst *BI = cast<BranchInst>(I);
@@ -1141,7 +1507,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
if (BI->isUnconditional()) {
const BasicBlock *LLVMSucc = BI->getSuccessor(0);
MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
- FastEmitBranch(MSucc, BI->getDebugLoc());
+ fastEmitBranch(MSucc, BI->getDebugLoc());
return true;
}
@@ -1152,7 +1518,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
case Instruction::Unreachable:
if (TM.Options.TrapUnreachable)
- return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
+ return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
else
return true;
@@ -1165,38 +1531,39 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
return false;
case Instruction::Call:
- return SelectCall(I);
+ return selectCall(I);
case Instruction::BitCast:
- return SelectBitCast(I);
+ return selectBitCast(I);
case Instruction::FPToSI:
- return SelectCast(I, ISD::FP_TO_SINT);
+ return selectCast(I, ISD::FP_TO_SINT);
case Instruction::ZExt:
- return SelectCast(I, ISD::ZERO_EXTEND);
+ return selectCast(I, ISD::ZERO_EXTEND);
case Instruction::SExt:
- return SelectCast(I, ISD::SIGN_EXTEND);
+ return selectCast(I, ISD::SIGN_EXTEND);
case Instruction::Trunc:
- return SelectCast(I, ISD::TRUNCATE);
+ return selectCast(I, ISD::TRUNCATE);
case Instruction::SIToFP:
- return SelectCast(I, ISD::SINT_TO_FP);
+ return selectCast(I, ISD::SINT_TO_FP);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
if (DstVT.bitsGT(SrcVT))
- return SelectCast(I, ISD::ZERO_EXTEND);
+ return selectCast(I, ISD::ZERO_EXTEND);
if (DstVT.bitsLT(SrcVT))
- return SelectCast(I, ISD::TRUNCATE);
+ return selectCast(I, ISD::TRUNCATE);
unsigned Reg = getRegForValue(I->getOperand(0));
- if (Reg == 0) return false;
- UpdateValueMap(I, Reg);
+ if (!Reg)
+ return false;
+ updateValueMap(I, Reg);
return true;
}
case Instruction::ExtractValue:
- return SelectExtractValue(I);
+ return selectExtractValue(I);
case Instruction::PHI:
llvm_unreachable("FastISel shouldn't visit PHI nodes!");
@@ -1207,83 +1574,72 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
}
-FastISel::FastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo)
- : FuncInfo(funcInfo),
- MF(funcInfo.MF),
- MRI(FuncInfo.MF->getRegInfo()),
- MFI(*FuncInfo.MF->getFrameInfo()),
- MCP(*FuncInfo.MF->getConstantPool()),
- TM(FuncInfo.MF->getTarget()),
- DL(*TM.getDataLayout()),
- TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()),
- TRI(*TM.getRegisterInfo()),
- LibInfo(libInfo) {
-}
+FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo,
+ bool SkipTargetIndependentISel)
+ : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()),
+ TII(*MF->getSubtarget().getInstrInfo()),
+ TLI(*MF->getSubtarget().getTargetLowering()),
+ TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
+ SkipTargetIndependentISel(SkipTargetIndependentISel) {}
FastISel::~FastISel() {}
-bool FastISel::FastLowerArguments() {
+bool FastISel::fastLowerArguments() { return false; }
+
+bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; }
+
+bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) {
return false;
}
-unsigned FastISel::FastEmit_(MVT, MVT,
- unsigned) {
- return 0;
-}
+unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; }
-unsigned FastISel::FastEmit_r(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/) {
+unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/) {
return 0;
}
-unsigned FastISel::FastEmit_rr(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/,
- unsigned /*Op1*/, bool /*Op1IsKill*/) {
+unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, unsigned /*Op1*/,
+ bool /*Op1IsKill*/) {
return 0;
}
-unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
return 0;
}
-unsigned FastISel::FastEmit_f(MVT, MVT,
- unsigned, const ConstantFP * /*FPImm*/) {
+unsigned FastISel::fastEmit_f(MVT, MVT, unsigned,
+ const ConstantFP * /*FPImm*/) {
return 0;
}
-unsigned FastISel::FastEmit_ri(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/,
- uint64_t /*Imm*/) {
+unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, uint64_t /*Imm*/) {
return 0;
}
-unsigned FastISel::FastEmit_rf(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/,
+unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/,
const ConstantFP * /*FPImm*/) {
return 0;
}
-unsigned FastISel::FastEmit_rri(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/,
- unsigned /*Op1*/, bool /*Op1IsKill*/,
- uint64_t /*Imm*/) {
+unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, unsigned /*Op1*/,
+ bool /*Op1IsKill*/, uint64_t /*Imm*/) {
return 0;
}
-/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
-/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// This method is a wrapper of fastEmit_ri. It first tries to emit an
+/// instruction with an immediate operand using fastEmit_ri.
/// If that fails, it materializes the immediate into a register and try
-/// FastEmit_rr instead.
-unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm, MVT ImmType) {
+/// fastEmit_rr instead.
+unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm, MVT ImmType) {
// If this is a multiply by a power of two, emit this as a shift left.
if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
Opcode = ISD::SHL;
@@ -1301,30 +1657,29 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
return 0;
// First check if immediate type is legal. If not, we can't use the ri form.
- unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
- if (ResultReg != 0)
+ unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ if (ResultReg)
return ResultReg;
- unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
- if (MaterialReg == 0) {
+ unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (!MaterialReg) {
// This is a bit ugly/slow, but failing here means falling out of
// fast-isel, which would be very slow.
- IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
- VT.getSizeInBits());
+ IntegerType *ITy =
+ IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits());
MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
- assert (MaterialReg != 0 && "Unable to materialize imm.");
- if (MaterialReg == 0) return 0;
+ if (!MaterialReg)
+ return 0;
}
- return FastEmit_rr(VT, VT, Opcode,
- Op0, Op0IsKill,
- MaterialReg, /*Kill=*/true);
+ return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg,
+ /*IsKill=*/true);
}
-unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
return MRI.createVirtualRegister(RC);
}
-unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II,
- unsigned Op, unsigned OpNum) {
+unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
+ unsigned OpNum) {
if (TargetRegisterInfo::isVirtualRegister(Op)) {
const TargetRegisterClass *RegClass =
TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
@@ -1340,8 +1695,8 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II,
return Op;
}
-unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
- const TargetRegisterClass* RC) {
+unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC) {
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
@@ -1349,9 +1704,9 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill) {
+unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1359,10 +1714,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill));
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
@@ -1370,10 +1725,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill) {
+unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1382,23 +1737,23 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill));
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- unsigned Op2, bool Op2IsKill) {
+unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, unsigned Op2,
+ bool Op2IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1408,48 +1763,46 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addReg(Op2, Op2IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill));
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addReg(Op2, Op2IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm) {
+unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
- RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF);
- MRI.constrainRegClass(Op0, RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Imm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Imm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm1, uint64_t Imm2) {
+unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm1,
+ uint64_t Imm2) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1457,24 +1810,23 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Imm1)
- .addImm(Imm2);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Imm1)
- .addImm(Imm2);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- const ConstantFP *FPImm) {
+unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, const ConstantFP *FPImm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1482,23 +1834,22 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addFPImm(FPImm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addFPImm(FPImm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addFPImm(FPImm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addFPImm(FPImm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm) {
+unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1507,25 +1858,25 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
+unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm1, uint64_t Imm2) {
+ unsigned Op0, bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, uint64_t Imm1,
+ uint64_t Imm2) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1534,28 +1885,30 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm1).addImm(Imm2);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm1).addImm(Imm2);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm) {
+unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addImm(Imm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1564,41 +1917,41 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm1, uint64_t Imm2) {
+unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, uint64_t Imm1,
+ uint64_t Imm2) {
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addImm(Imm1).addImm(Imm2);
+ .addImm(Imm1)
+ .addImm(Imm2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1)
+ .addImm(Imm2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
- unsigned Op0, bool Op0IsKill,
- uint32_t Idx) {
+unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
+ bool Op0IsKill, uint32_t Idx) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx);
return ResultReg;
}
-/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
-/// with all but the least significant bit set to zero.
-unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
- return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+/// Emit MachineInstrs to compute the value of Op with all but the least
+/// significant bit set to zero.
+unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
}
/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
@@ -1607,22 +1960,24 @@ unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
/// nodes as input. We cannot just directly add them, because expansion
/// might result in multiple MBB's for one BB. As such, the start of the
/// BB might correspond to a different MBB than the end.
-bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TerminatorInst *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
- unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+ FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
// Check successor nodes' PHI nodes that expect a constant to be available
// from this block.
for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
const BasicBlock *SuccBB = TI->getSuccessor(succ);
- if (!isa<PHINode>(SuccBB->begin())) continue;
+ if (!isa<PHINode>(SuccBB->begin()))
+ continue;
MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
// If this terminator has multiple identical successors (common for
// switches), only handle each succ once.
- if (!SuccsHandled.insert(SuccMBB)) continue;
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
MachineBasicBlock::iterator MBBI = SuccMBB->begin();
@@ -1630,10 +1985,11 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// nodes and Machine PHI nodes, but the incoming operands have not been
// emitted yet.
for (BasicBlock::const_iterator I = SuccBB->begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ const auto *PN = dyn_cast<PHINode>(I); ++I) {
// Ignore dead phi's.
- if (PN->use_empty()) continue;
+ if (PN->use_empty())
+ continue;
// Only handle legal types. Two interesting things to note here. First,
// by bailing out early, we may leave behind some dead instructions,
@@ -1644,10 +2000,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
// Handle integer promotions, though, because they're common and easy.
- if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
- VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
- else {
- FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
}
}
@@ -1657,12 +2011,12 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Set the DebugLoc for the copy. Prefer the location of the operand
// if there is one; use the location of the PHI otherwise.
DbgLoc = PN->getDebugLoc();
- if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+ if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
DbgLoc = Inst->getDebugLoc();
unsigned Reg = getRegForValue(PHIOp);
- if (Reg == 0) {
- FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ if (!Reg) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
}
FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
@@ -1675,17 +2029,17 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
assert(LI->hasOneUse() &&
- "tryToFoldLoad expected a LoadInst with a single use");
+ "tryToFoldLoad expected a LoadInst with a single use");
// We know that the load has a single use, but don't know what it is. If it
// isn't one of the folded instructions, then we can't succeed here. Handle
// this by scanning the single-use users of the load until we get to FoldInst.
- unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
const Instruction *TheUser = LI->user_back();
- while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
// Stay in the right block.
TheUser->getParent() == FoldInst->getParent() &&
- --MaxUsers) { // Don't scan too far.
+ --MaxUsers) { // Don't scan too far.
// If there are multiple or no uses of this instruction, then bail out.
if (!TheUser->hasOneUse())
return false;
@@ -1707,7 +2061,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
// then there actually was no reference to it. Perhaps the load is referenced
// by a dead instruction.
unsigned LoadReg = getRegForValue(LI);
- if (LoadReg == 0)
+ if (!LoadReg)
return false;
// We can't fold if this vreg has no uses or more than one use. Multiple uses
@@ -1765,19 +2119,20 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
Flags = MachineMemOperand::MOStore;
Ptr = SI->getPointerOperand();
ValTy = SI->getValueOperand()->getType();
- } else {
+ } else
return nullptr;
- }
- bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
- bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
- const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
+ bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
+ bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
- if (Alignment == 0) // Ensure that codegen never sees alignment 0.
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0.
Alignment = DL.getABITypeAlignment(ValTy);
- unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);
+ unsigned Size = DL.getTypeStoreSize(ValTy);
if (IsVolatile)
Flags |= MachineMemOperand::MOVolatile;
@@ -1787,5 +2142,45 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
Flags |= MachineMemOperand::MOInvariant;
return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
- Alignment, TBAAInfo, Ranges);
+ Alignment, AAInfo, Ranges);
+}
+
+CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const {
+ // If both operands are the same, then try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (CI->getOperand(0) != CI->getOperand(1))
+ return Predicate;
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid predicate!");
+ case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
+
+ case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
+ }
+
+ return Predicate;
}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index ae124e8..86b9542 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -36,6 +36,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -55,58 +56,71 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
return false;
}
+static ISD::NodeType getPreferredExtendForValue(const Value *V) {
+ // For the users of the source value being used for compare instruction, if
+ // the number of signed predicate is greater than unsigned predicate, we
+ // prefer to use SIGN_EXTEND.
+ //
+ // With this optimization, we would be able to reduce some redundant sign or
+ // zero extension instruction, and eventually more machine CSE opportunities
+ // can be exposed.
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ unsigned NumOfSigned = 0, NumOfUnsigned = 0;
+ for (const User *U : V->users()) {
+ if (const auto *CI = dyn_cast<CmpInst>(U)) {
+ NumOfSigned += CI->isSigned();
+ NumOfUnsigned += CI->isUnsigned();
+ }
+ }
+ if (NumOfSigned > NumOfUnsigned)
+ ExtendKind = ISD::SIGN_EXTEND;
+
+ return ExtendKind;
+}
+
void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
SelectionDAG *DAG) {
- const TargetLowering *TLI = TM.getTargetLowering();
-
Fn = &fn;
MF = &mf;
+ TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI);
CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
- Fn->isVarArg(),
- Outs, Fn->getContext());
+ Fn->isVarArg(), Outs, Fn->getContext());
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
// them.
Function::const_iterator BB = Fn->begin(), EB = Fn->end();
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- // Don't fold inalloca allocas or other dynamic allocas into the initial
- // stack frame allocation, even if they are in the entry block.
- if (!AI->isStaticAlloca())
- continue;
-
- if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
- Type *Ty = AI->getAllocatedType();
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
- unsigned Align =
- std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
- AI->getAlignment());
-
- TySize *= CUI->getZExtValue(); // Get total allocated size.
- if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
-
- StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
- }
- }
-
for (; BB != EB; ++BB)
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
- // Look for dynamic allocas.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- if (!AI->isStaticAlloca()) {
+ // Static allocas can be folded into the initial stack frame adjustment.
+ if (AI->isStaticAlloca()) {
+ const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
+ Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+ StaticAllocaMap[AI] =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
+
+ } else {
unsigned Align = std::max(
(unsigned)TLI->getDataLayout()->getPrefTypeAlignment(
AI->getAllocatedType()),
AI->getAlignment());
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ MF->getSubtarget().getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
Align = 0;
// Inform the Frame Information that we have variable-sized objects.
@@ -126,9 +140,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (Op.Type == InlineAsm::isClobber) {
// Clobbers don't have SDValue operands, hence SDValue().
TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
- std::pair<unsigned, const TargetRegisterClass*> PhysReg =
- TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
- Op.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+ TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
+ Op.ConstraintVT);
if (PhysReg.first == SP)
MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
}
@@ -136,6 +150,21 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
+ // Look for calls to the @llvm.va_start intrinsic. We can omit some
+ // prologue boilerplate for variadic functions that don't examine their
+ // arguments.
+ if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ MF->getFrameInfo()->setHasVAStart(true);
+ }
+
+ // If we have a musttail call in a variadic funciton, we need to ensure we
+ // forward implicit register parameters.
+ if (const auto *CI = dyn_cast<CallInst>(I)) {
+ if (CI->isMustTailCall() && Fn->isVarArg())
+ MF->getFrameInfo()->setHasMustTailInVarArgFunc(true);
+ }
+
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
@@ -166,13 +195,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
StaticAllocaMap.find(AI);
if (SI != StaticAllocaMap.end()) { // Check for VLAs.
int FI = SI->second;
- MMI.setVariableDbgInfo(DI->getVariable(),
+ MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
FI, DI->getDebugLoc());
}
}
}
}
}
+
+ // Decide the preferred extend type for a value.
+ PreferredExtendType[I] = getPreferredExtendForValue(I);
}
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
@@ -208,7 +240,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
for (unsigned i = 0; i != NumRegisters; ++i)
BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
PHIReg += NumRegisters;
@@ -241,12 +273,13 @@ void FunctionLoweringInfo::clear() {
ArgDbgValues.clear();
ByValArgFrameIndexMap.clear();
RegFixups.clear();
+ PreferredExtendType.clear();
}
/// CreateReg - Allocate a single virtual register for the given type.
unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
- return RegInfo->
- createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT));
+ return RegInfo->createVirtualRegister(
+ MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
}
/// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -257,7 +290,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
/// will assign registers for each member or element.
///
unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, Ty, ValueVTs);
@@ -306,8 +339,6 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
if (!Ty->isIntegerTy() || Ty->isVectorTy())
return;
- const TargetLowering *TLI = TM.getTargetLowering();
-
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, Ty, ValueVTs);
assert(ValueVTs.size() == 1 &&
@@ -452,7 +483,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
// Gather all the type infos for this landing pad and pass them along to
// MachineModuleInfo.
- std::vector<const GlobalVariable *> TyInfo;
+ std::vector<const GlobalValue *> TyInfo;
unsigned N = I.getNumArgOperands();
for (unsigned i = N - 1; i > 1; --i) {
@@ -510,14 +541,14 @@ void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
Value *Val = I.getClause(i - 1);
if (I.isCatch(i - 1)) {
MMI.addCatchTypeInfo(MBB,
- dyn_cast<GlobalVariable>(Val->stripPointerCasts()));
+ dyn_cast<GlobalValue>(Val->stripPointerCasts()));
} else {
// Add filters in a list.
Constant *CVal = cast<Constant>(Val);
- SmallVector<const GlobalVariable*, 4> FilterList;
+ SmallVector<const GlobalValue*, 4> FilterList;
for (User::op_iterator
II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
- FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts()));
+ FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
MMI.addFilterTypeInfo(MBB, FilterList);
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 7c124b8..a65f33e 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -27,7 +27,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "instr-emitter"
@@ -265,12 +265,16 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
MIB.addReg(VRBase, RegState::Define);
}
- SDValue Op(Node, i);
- if (IsClone)
- VRBaseMap.erase(Op);
- bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- (void)isNew; // Silence compiler warning.
- assert(isNew && "Node emitted out of order - early");
+ // If this def corresponds to a result of the SDNode insert the VRBase into
+ // the lookup map.
+ if (i < NumResults) {
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
}
}
@@ -402,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
Type *Type = CP->getType();
// MachineConstantPool wants an explicit alignment.
if (Align == 0) {
- Align = TM->getDataLayout()->getPrefTypeAlignment(Type);
+ Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type);
if (Align == 0) {
// Alignment of vector types. FIXME!
- Align = TM->getDataLayout()->getTypeAllocSize(Type);
+ Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type);
}
}
@@ -643,14 +647,18 @@ MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap) {
uint64_t Offset = SD->getOffset();
- MDNode* MDPtr = SD->getMDPtr();
+ MDNode *Var = SD->getVariable();
+ MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
if (SD->getKind() == SDDbgValue::FRAMEIX) {
// Stack address; this needs to be lowered in target-dependent fashion.
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr);
+ .addFrameIndex(SD->getFrameIx())
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
@@ -696,7 +704,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
MIB.addReg(0U, RegState::Debug);
}
- MIB.addMetadata(MDPtr);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
return &*MIB;
}
@@ -859,9 +868,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
// Run post-isel target hook to adjust this instruction if needed.
-#ifdef NDEBUG
if (II.hasPostISelHook())
-#endif
TLI->AdjustInstrPostInstrSelection(MIB, Node);
}
@@ -1013,11 +1020,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/// at the given position in the given block.
InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
MachineBasicBlock::iterator insertpos)
- : MF(mbb->getParent()),
- MRI(&MF->getRegInfo()),
- TM(&MF->getTarget()),
- TII(TM->getInstrInfo()),
- TRI(TM->getRegisterInfo()),
- TLI(TM->getTargetLowering()),
- MBB(mbb), InsertPos(insertpos) {
-}
+ : MF(mbb->getParent()), MRI(&MF->getRegInfo()),
+ TII(MF->getSubtarget().getInstrInfo()),
+ TRI(MF->getSubtarget().getRegisterInfo()),
+ TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
+ InsertPos(insertpos) {}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 920dda8..7b86f7d 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef INSTREMITTER_H
-#define INSTREMITTER_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -29,7 +29,6 @@ class SDDbgValue;
class InstrEmitter {
MachineFunction *MF;
MachineRegisterInfo *MRI;
- const TargetMachine *TM;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const TargetLowering *TLI;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c0e8c8c..5d17a5f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -33,8 +34,11 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "legalizedag"
+
//===----------------------------------------------------------------------===//
/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
/// hacks on it until the target machine can handle it. This involves
@@ -48,16 +52,17 @@ using namespace llvm;
/// will attempt merge setcc and brc instructions into brcc's.
///
namespace {
-class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
+class SelectionDAGLegalize {
const TargetMachine &TM;
const TargetLowering &TLI;
SelectionDAG &DAG;
- /// LegalizePosition - The iterator for walking through the node list.
- SelectionDAG::allnodes_iterator LegalizePosition;
+ /// \brief The set of nodes which have already been legalized. We hold a
+ /// reference to it in order to update as necessary on node deletion.
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes;
- /// LegalizedNodes - The set of nodes which have already been legalized.
- SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ /// \brief A set of all the nodes updated during legalization.
+ SmallSetVector<SDNode *, 16> *UpdatedNodes;
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(*DAG.getContext(), VT);
@@ -66,14 +71,16 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
// Libcall insertion helpers.
public:
- explicit SelectionDAGLegalize(SelectionDAG &DAG);
+ SelectionDAGLegalize(SelectionDAG &DAG,
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes,
+ SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr)
+ : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
+ LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {}
- void LegalizeDAG();
-
-private:
- /// LegalizeOp - Legalizes the given operation.
+ /// \brief Legalizes the given operation.
void LegalizeOp(SDNode *Node);
+private:
SDValue OptimizeFloatStore(StoreSDNode *ST);
void LegalizeLoadOps(SDNode *Node);
@@ -145,37 +152,49 @@ private:
void ExpandNode(SDNode *Node);
void PromoteNode(SDNode *Node);
- void ForgetNode(SDNode *N) {
- LegalizedNodes.erase(N);
- if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
- ++LegalizePosition;
- }
-
public:
- // DAGUpdateListener implementation.
- void NodeDeleted(SDNode *N, SDNode *E) override {
- ForgetNode(N);
- }
- void NodeUpdated(SDNode *N) override {}
-
// Node replacement helpers
void ReplacedNode(SDNode *N) {
- if (N->use_empty()) {
- DAG.RemoveDeadNode(N);
- } else {
- ForgetNode(N);
- }
+ LegalizedNodes.erase(N);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(N);
}
void ReplaceNode(SDNode *Old, SDNode *New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ assert(Old->getNumValues() == New->getNumValues() &&
+ "Replacing one node with another that produces a different number "
+ "of values!");
DAG.ReplaceAllUsesWith(Old, New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i));
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New);
ReplacedNode(Old);
}
void ReplaceNode(SDValue Old, SDValue New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
DAG.ReplaceAllUsesWith(Old, New);
+ DAG.TransferDbgValues(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
ReplacedNode(Old.getNode());
}
void ReplaceNode(SDNode *Old, const SDValue *New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
+
DAG.ReplaceAllUsesWith(Old, New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ DEBUG(dbgs() << (i == 0 ? " with: "
+ : " and: ");
+ New[i]->dump(&DAG));
+ DAG.TransferDbgValues(SDValue(Old, i), New[i]);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New[i].getNode());
+ }
ReplacedNode(Old);
}
};
@@ -213,40 +232,6 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl,
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
}
-SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
- : SelectionDAG::DAGUpdateListener(dag),
- TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
- DAG(dag) {
-}
-
-void SelectionDAGLegalize::LegalizeDAG() {
- DAG.AssignTopologicalOrder();
-
- // Visit all the nodes. We start in topological order, so that we see
- // nodes with their original operands intact. Legalization can produce
- // new nodes which may themselves need to be legalized. Iterate until all
- // nodes have been legalized.
- for (;;) {
- bool AnyLegalized = false;
- for (LegalizePosition = DAG.allnodes_end();
- LegalizePosition != DAG.allnodes_begin(); ) {
- --LegalizePosition;
-
- SDNode *N = LegalizePosition;
- if (LegalizedNodes.insert(N)) {
- AnyLegalized = true;
- LegalizeOp(N);
- }
- }
- if (!AnyLegalized)
- break;
-
- }
-
- // Remove dead nodes now.
- DAG.RemoveDeadNodes();
-}
-
/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
/// a load from the constant pool.
SDValue
@@ -270,7 +255,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
EVT OrigVT = VT;
EVT SVT = VT;
- while (SVT != MVT::f32) {
+ while (SVT != MVT::f32 && SVT != MVT::f16) {
SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
// Only do this if the target has a native EXTLOAD instruction from
@@ -291,7 +276,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
DAG.getEntryNode(),
CPIdx, MachinePointerInfo::getConstantPool(),
- VT, false, false, Alignment);
+ VT, false, false, false, Alignment);
return Result;
}
SDValue Result =
@@ -377,7 +362,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Load from the stack slot.
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
MachinePointerInfo(),
- MemVT, false, false, 0);
+ MemVT, false, false, false, 0);
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo()
@@ -385,7 +370,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
MemVT, ST->isVolatile(),
ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset),
- ST->getTBAAInfo()));
+ ST->getAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
@@ -417,7 +402,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
- Alignment, ST->getTBAAInfo());
+ Alignment, ST->getAAInfo());
SDValue Result =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -476,7 +461,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(),
MinAlign(LD->getAlignment(), Offset),
- LD->getTBAAInfo());
+ LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
MachinePointerInfo(), false, false, 0));
@@ -494,8 +479,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->getPointerInfo().getWithOffset(Offset),
MemVT, LD->isVolatile(),
LD->isNonTemporal(),
+ LD->isInvariant(),
MinAlign(LD->getAlignment(), Offset),
- LD->getTBAAInfo());
+ LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
@@ -508,7 +494,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Finally, perform the original load only redirected to the stack slot.
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
- MachinePointerInfo(), LoadedVT, false, false, 0);
+ MachinePointerInfo(), LoadedVT, false,false, false,
+ 0);
// Callers expect a MERGE_VALUES node.
ValResult = Load;
@@ -538,25 +525,27 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isLittleEndian()) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), Alignment, LD->getTBAAInfo());
+ LD->isNonTemporal(), LD->isInvariant(), Alignment,
+ LD->getAAInfo());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment, IncrementSize),
- LD->getTBAAInfo());
+ LD->isNonTemporal(),LD->isInvariant(),
+ MinAlign(Alignment, IncrementSize), LD->getAAInfo());
} else {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), Alignment, LD->getTBAAInfo());
+ LD->isNonTemporal(), LD->isInvariant(), Alignment,
+ LD->getAAInfo());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment, IncrementSize),
- LD->getTBAAInfo());
+ LD->isNonTemporal(), LD->isInvariant(),
+ MinAlign(Alignment, IncrementSize), LD->getAAInfo());
}
// aggregate the two parts
@@ -659,7 +648,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
SDLoc dl(ST);
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
@@ -668,7 +657,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
}
if (CFP->getValueType(0) == MVT::f64) {
@@ -677,7 +666,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
}
if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
@@ -690,13 +679,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (TLI.isBigEndian()) std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, Alignment, AAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U),
- TBAAInfo);
+ AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -714,7 +703,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
if (!ST->isTruncatingStore()) {
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
@@ -731,10 +720,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// If this is an unaligned store and the target doesn't support it,
// expand it.
unsigned AS = ST->getAddressSpace();
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
+ unsigned Align = ST->getAlignment();
+ if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
+ if (Align < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node),
DAG, TLI, this);
}
@@ -754,7 +744,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result =
DAG.getStore(Chain, dl, Value, Ptr,
ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, Alignment, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -777,7 +767,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
NVT, isVolatile, isNonTemporal, Alignment,
- TBAAInfo);
+ AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -799,7 +789,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
RoundVT,
isVolatile, isNonTemporal, Alignment,
- TBAAInfo);
+ AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -811,7 +801,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
} else {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
@@ -821,7 +811,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
TLI.getShiftAmountTy(Value.getValueType())));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
RoundVT, isVolatile, isNonTemporal, Alignment,
- TBAAInfo);
+ AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -830,7 +820,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
}
// The order of the stores doesn't matter.
@@ -842,12 +832,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
unsigned AS = ST->getAddressSpace();
+ unsigned Align = ST->getAlignment();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
+ if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
+ if (Align < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
break;
@@ -868,7 +859,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
SDValue Result =
DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -893,13 +884,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
unsigned AS = LD->getAddressSpace();
+ unsigned Align = LD->getAlignment();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) {
+ if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment =
TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
+ if (Align < ABIAlignment){
ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
}
}
@@ -928,6 +920,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
assert(RVal.getNode() != Node && "Load must be completely replaced");
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(RVal.getNode());
+ UpdatedNodes->insert(RChain.getNode());
+ }
ReplacedNode(Node);
}
return;
@@ -938,7 +934,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ bool isInvariant = LD->isInvariant();
+ AAMDNodes AAInfo = LD->getAAInfo();
if (SrcWidth != SrcVT.getStoreSizeInBits() &&
// Some targets pretend to have an i1 loading operation, and actually
@@ -965,7 +962,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
SDValue Result =
DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ NVT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ AAInfo);
Ch = Result.getValue(1); // The chain.
@@ -1002,7 +1000,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
Chain, Ptr,
LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, isInvariant, Alignment, AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1010,8 +1008,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ ExtraVT, isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1031,7 +1029,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the top RoundWidth bits.
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, isInvariant, Alignment, AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1040,8 +1038,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ ExtraVT, isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1080,12 +1078,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// it, expand it.
EVT MemVT = LD->getMemoryVT();
unsigned AS = LD->getAddressSpace();
- if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) {
+ unsigned Align = LD->getAlignment();
+ if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
Type *Ty =
LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment =
TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
+ if (Align < ABIAlignment){
ExpandUnalignedLoad(cast<LoadSDNode>(Node),
DAG, TLI, Value, Chain);
}
@@ -1148,6 +1147,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
assert(Value.getNode() != Node && "Load must be completely replaced");
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Value.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
ReplacedNode(Node);
}
}
@@ -1155,6 +1158,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
/// LegalizeOp - Return a legal replacement for the given operation, with
/// all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
+
if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
return;
@@ -1186,6 +1191,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action != TargetLowering::Promote)
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::FP_TO_FP16:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
@@ -1334,10 +1340,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
if (NewNode != Node) {
- DAG.ReplaceAllUsesWith(Node, NewNode);
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
- ReplacedNode(Node);
+ ReplaceNode(Node, NewNode);
Node = NewNode;
}
switch (Action) {
@@ -1348,19 +1351,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
// a complete mess.
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Res.getNode()) {
- SmallVector<SDValue, 8> ResultVals;
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
- if (e == 1)
- ResultVals.push_back(Res);
- else
- ResultVals.push_back(Res.getValue(i));
- }
- if (Res.getNode() != Node || Res.getResNo() != 0) {
- DAG.ReplaceAllUsesWith(Node, ResultVals.data());
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
- ReplacedNode(Node);
+ if (!(Res.getNode() != Node || Res.getResNo() != 0))
+ return;
+
+ if (Node->getNumValues() == 1) {
+ // We can just directly replace this node with the lowered value.
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
}
+
+ SmallVector<SDValue, 8> ResultVals;
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ ResultVals.push_back(Res.getValue(i));
+ ReplaceNode(Node, ResultVals.data());
return;
}
}
@@ -1448,7 +1451,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
MachinePointerInfo(),
Vec.getValueType().getVectorElementType(),
- false, false, 0);
+ false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
@@ -1483,7 +1486,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
StackPtr);
// Store the subvector.
- Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ Ch = DAG.getStore(Ch, dl, Part, SubStackPtr,
MachinePointerInfo(), false, false, 0);
// Finally, load the updated vector.
@@ -1623,7 +1626,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ DAG.getSubtarget().getFrameLowering()->getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
@@ -1797,7 +1801,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
- PtrInfo, SlotVT, false, false, DestAlign);
+ PtrInfo, SlotVT, false, false, false, DestAlign);
}
SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1877,7 +1881,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
ShuffleVec.data());
else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
return false;
- NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices));
+ NewIntermedVals.push_back(
+ std::make_pair(Shuffle, std::move(FinalIndices)));
}
// If we had an odd number of defined values, then append the last
@@ -2580,7 +2585,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, Alignment);
+ MVT::f32, false, false, false, Alignment);
HandleSDNode Handle(Load);
LegalizeOp(Load.getNode());
FudgeInReg = Handle.getValue();
@@ -2782,7 +2787,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// x = x | (x >>32); // for 64-bit input
// return popcount(~x);
//
- // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ // Ref: "Hacker's Delight" by Henry Warren
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy(VT);
unsigned len = VT.getSizeInBits();
@@ -2801,7 +2806,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
- // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ // Ref: "Hacker's Delight" by Henry Warren
EVT VT = Op.getValueType();
SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNOT(dl, Op, VT),
@@ -3153,65 +3158,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(0), Node->getValueType(0), dl);
Results.push_back(Tmp1);
break;
- case ISD::FP_TO_SINT: {
- EVT VT = Node->getOperand(0).getValueType();
- EVT NVT = Node->getValueType(0);
-
- // FIXME: Only f32 to i64 conversions are supported.
- if (VT != MVT::f32 || NVT != MVT::i64)
- break;
-
- // Expand f32 -> i64 conversion
- // This algorithm comes from compiler-rt's implementation of fixsfdi:
- // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
- EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits());
- SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
- SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
- SDValue Bias = DAG.getConstant(127, IntVT);
- SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
- IntVT);
- SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
- SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
-
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
-
- SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
- DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT)));
- SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
-
- SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
- DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT)));
- Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
-
- SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
- DAG.getConstant(0x00800000, IntVT));
-
- R = DAG.getZExtOrTrunc(R, dl, NVT);
-
-
- R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
- DAG.getNode(ISD::SHL, dl, NVT, R,
- DAG.getZExtOrTrunc(
- DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
- dl, TLI.getShiftAmountTy(IntVT))),
- DAG.getNode(ISD::SRL, dl, NVT, R,
- DAG.getZExtOrTrunc(
- DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
- dl, TLI.getShiftAmountTy(IntVT))),
- ISD::SETGT);
-
- SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
- DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
- Sign);
-
- Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
- DAG.getConstant(0, NVT), Ret, ISD::SETLT));
+ case ISD::FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
break;
- }
case ISD::FP_TO_UINT: {
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
@@ -3450,6 +3400,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
+ case ISD::FMINNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
+ break;
+ case ISD::FMAXNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
+ break;
case ISD::FSQRT:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
@@ -3568,12 +3528,38 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
- case ISD::FP16_TO_FP32:
- Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ case ISD::FADD:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+ break;
+ case ISD::FMUL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
break;
- case ISD::FP32_TO_FP16:
- Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+ case ISD::FP16_TO_FP: {
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ break;
+ }
+
+ // We can extend to types bigger than f32 in two steps without changing the
+ // result. Since "f16 -> f32" is much more commonly available, give CodeGen
+ // the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
break;
+ }
+ case ISD::FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
case ISD::ConstantFP: {
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
@@ -3584,12 +3570,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FSUB: {
EVT VT = Node->getValueType(0);
- assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
- TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
- "Don't know how to expand this FP subtraction!");
- Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
- Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
- Results.push_back(Tmp1);
+ if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ } else {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+ }
break;
}
case ISD::SUB: {
@@ -3844,9 +3834,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
DAG.getIntPtrConstant(1));
// Ret is a node with an illegal type. Because such things are not
- // generally permitted during this phase of legalization, delete the
- // node. The above EXTRACT_ELEMENT nodes should have been folded.
- DAG.DeleteNode(Ret.getNode());
+ // generally permitted during this phase of legalization, make sure the
+ // node has no more uses. The above EXTRACT_ELEMENT nodes should have been
+ // folded.
+ assert(Ret->use_empty() &&
+ "Unexpected uses of illegally type from expanded lib call.");
}
if (isSigned) {
@@ -3907,7 +3899,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
MachinePointerInfo::getJumpTable(), MemVT,
- false, false, 0);
+ false, false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
// For PIC, the sequence is:
@@ -4217,6 +4209,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
// use the new one.
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Tmp2.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
ReplacedNode(Node);
break;
}
@@ -4293,6 +4289,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp1, Tmp2, Node->getOperand(2)));
break;
}
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
case ISD::FPOW: {
@@ -4323,7 +4322,55 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
// SelectionDAG::Legalize - This is the entry point for the file.
//
void SelectionDAG::Legalize() {
- /// run - This is the main entry point to this class.
- ///
- SelectionDAGLegalize(*this).LegalizeDAG();
+ AssignTopologicalOrder();
+
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes);
+
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ for (;;) {
+ bool AnyLegalized = false;
+ for (auto NI = allnodes_end(); NI != allnodes_begin();) {
+ --NI;
+
+ SDNode *N = NI;
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ continue;
+ }
+
+ if (LegalizedNodes.insert(N).second) {
+ AnyLegalized = true;
+ Legalizer.LegalizeOp(N);
+
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ }
+ }
+ }
+ if (!AnyLegalized)
+ break;
+
+ }
+
+ // Remove dead nodes now.
+ RemoveDeadNodes();
+}
+
+bool SelectionDAG::LegalizeOp(SDNode *N,
+ SmallSetVector<SDNode *, 16> &UpdatedNodes) {
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes);
+
+ // Directly insert the node in question, and legalize it. This will recurse
+ // as needed through operands.
+ LegalizedNodes.insert(N);
+ Legalizer.LegalizeOp(N);
+
+ return LegalizedNodes.count(N);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6b8fec6..4591e79 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -68,6 +68,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
+ case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
@@ -85,7 +87,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
- case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
+ case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
@@ -153,6 +155,32 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask);
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32,
+ RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80,
+ RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128),
+ NVT, Ops, 2, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32,
+ RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80,
+ RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128),
+ NVT, Ops, 2, false, SDLoc(N)).first;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
@@ -373,23 +401,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = N->getOperand(0);
+
+ // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+ // entirely possible for both f16 and f32 to be legal, so use the fully
+ // hard-float FP_EXTEND rather than FP16_TO_FP.
+ if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+ Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
+ if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
+ SoftenFloatResult(Op.getNode(), 0);
+ }
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
+ Op = GetSoftenedFloat(Op);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
// nodes?
-SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
+ EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
- SDLoc(N)).first;
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
+ false, SDLoc(N)).first;
+ if (N->getValueType(0) == MVT::f32)
+ return Res32;
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f16) {
+ // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a
+ // storage-only type get a chance to select things.
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op);
+ }
+
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
@@ -498,6 +551,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ if (N->getValueType(0) == MVT::f16)
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
+
SDValue Op = GetSoftenedFloat(N->getOperand(0));
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
@@ -520,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
L->getPointerInfo(), NVT, L->isVolatile(),
L->isNonTemporal(), false, L->getAlignment(),
- L->getTBAAInfo());
+ L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -533,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
L->isNonTemporal(), false, L->getAlignment(),
- L->getTBAAInfo());
+ L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -625,10 +681,11 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
+ case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
- case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
@@ -654,11 +711,32 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
GetSoftenedFloat(N->getOperand(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
+ // If we get here, the result must be legal but the source illegal.
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+ if (SVT == MVT::f16)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op);
+
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
+
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+}
+
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ // We actually deal with the partially-softened FP_TO_FP16 node too, which
+ // returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
+ assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16);
+
EVT SVT = N->getOperand(0).getValueType();
EVT RVT = N->getValueType(0);
+ EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
- RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -704,13 +782,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
- EVT RVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -813,6 +884,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
+ case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
@@ -876,6 +949,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
ISD::SETEQ);
}
+void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index cffb0a1..b73bb0a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -99,7 +99,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
- case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+ case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
case ISD::AND:
case ISD::OR:
@@ -225,10 +225,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
N->getFailureOrdering(), N->getSynchScope());
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- unsigned ChainOp = N->getNumValues() - 1;
- ReplaceValueWith(SDValue(N, ChainOp), Res.getValue(ChainOp));
+ // Update the use to N with the newly created Res.
+ for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
+ ReplaceValueWith(SDValue(N, i), Res.getValue(i));
return Res;
}
@@ -402,7 +401,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
DAG.getValueType(N->getValueType(0).getScalarType()));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -827,7 +826,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
- case ISD::FP16_TO_FP32:
+ case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
@@ -863,7 +862,26 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
switch (CCCode) {
default: llvm_unreachable("Unknown integer comparison!");
case ISD::SETEQ:
- case ISD::SETNE:
+ case ISD::SETNE: {
+ SDValue OpL = GetPromotedInteger(NewLHS);
+ SDValue OpR = GetPromotedInteger(NewRHS);
+
+ // We would prefer to promote the comparison operand with sign extension,
+ // if we find the operand is actually to truncate an AssertSext. With this
+ // optimization, we can avoid inserting real truncate instruction, which
+ // is redudant eventually.
+ if (OpL->getOpcode() == ISD::AssertSext &&
+ cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() &&
+ OpR->getOpcode() == ISD::AssertSext &&
+ cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) {
+ NewLHS = OpL;
+ NewRHS = OpR;
+ } else {
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ }
+ break;
+ }
case ISD::SETUGE:
case ISD::SETUGT:
case ISD::SETULE:
@@ -947,7 +965,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
EVT VecVT = N->getValueType(0);
unsigned NumElts = VecVT.getVectorNumElements();
assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
- "Legal vector of one illegal element?");
+ "Legal vector of one illegal element?");
// Promote the inserted value. The type does not need to match the
// vector element type. Check that any extra bits introduced will be
@@ -1861,7 +1879,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
bool isInvariant = N->isInvariant();
- const MDNode *TBAAInfo = N->getTBAAInfo();
+ AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1870,7 +1888,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
EVT MemVT = N->getMemoryVT();
Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
- MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ MemVT, isVolatile, isNonTemporal, isInvariant,
+ Alignment, AAInfo);
// Remember the chain.
Ch = Lo.getValue(1);
@@ -1893,7 +1912,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, isInvariant, Alignment,
- TBAAInfo);
+ AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -1905,8 +1924,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1924,7 +1943,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, isInvariant, Alignment,
+ AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1933,8 +1953,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2711,7 +2731,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
- const MDNode *TBAAInfo = N->getTBAAInfo();
+ AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
SDValue Lo, Hi;
@@ -2721,7 +2741,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), isVolatile, isNonTemporal,
- Alignment, TBAAInfo);
+ Alignment, AAInfo);
}
if (TLI.isLittleEndian()) {
@@ -2729,7 +2749,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetExpandedInteger(N->getValue(), Lo, Hi);
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2742,7 +2762,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -2770,7 +2790,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Store both the high bits and maybe some of the low bits.
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
- HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ HiVT, isVolatile, isNonTemporal, Alignment, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -2780,7 +2800,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -2855,7 +2875,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
FudgePtr,
MachinePointerInfo::getConstantPool(),
MVT::f32,
- false, false, Alignment);
+ false, false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d0ca6f8..30f412b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SELECTIONDAG_LEGALIZETYPES_H
-#define SELECTIONDAG_LEGALIZETYPES_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -122,8 +122,8 @@ public:
explicit DAGTypeLegalizer(SelectionDAG &dag)
: TLI(dag.getTargetLoweringInfo()), DAG(dag),
ValueTypeActions(TLI.getValueTypeActions()) {
- assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
- "Too many value types for ValueTypeActions to hold!");
+ static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
}
/// run - This is the main entry point for the type legalizer. This does a
@@ -237,7 +237,7 @@ private:
SDValue PromoteIntRes_CTTZ(SDNode *N);
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
- SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
@@ -387,6 +387,8 @@ private:
SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FMINNUM(SDNode *N);
+ SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
@@ -403,7 +405,7 @@ private:
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
- SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_FPOWI(SDNode *N);
@@ -425,10 +427,10 @@ private:
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
- SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -450,6 +452,8 @@ private:
void ExpandFloatResult(SDNode *N, unsigned ResNo);
void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 7e2f7b6..38829b6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -256,13 +256,13 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ AAMDNodes AAInfo = LD->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
isVolatile, isNonTemporal, isInvariant, Alignment,
- TBAAInfo);
+ AAInfo);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -271,7 +271,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -470,7 +470,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
bool isNonTemporal = St->isNonTemporal();
- const MDNode *TBAAInfo = St->getTBAAInfo();
+ AAMDNodes AAInfo = St->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -482,14 +482,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
St->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 507e7ff..b5af7b7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -199,12 +199,29 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
- if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
- if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
+ switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getMemoryVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
- Changed = true;
- return LegalizeOp(ExpandLoad(Op));
- }
+ case TargetLowering::Custom:
+ if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
+ Changed = true;
+ if (Lowered->getNumValues() != Op->getNumValues()) {
+ // This expanded to something other than the load. Assume the
+ // lowering code took care of any chain values, and just handle the
+ // returned value.
+ assert(Result.getValue(1).use_empty() &&
+ "There are still live users of the old chain!");
+ return LegalizeOp(Lowered);
+ } else {
+ return TranslateLegalizeResults(Op, Lowered);
+ }
+ }
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandLoad(Op));
+ }
} else if (Op.getOpcode() == ISD::STORE) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT StVT = ST->getMemoryVT();
@@ -273,6 +290,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_TO_UINT:
case ISD::FNEG:
case ISD::FABS:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -353,9 +372,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
}
- // The rest of the time, vector "promotion" is basically just bitcasting and
- // doing the operation in a different type. For example, x86 promotes
- // ISD::AND on v2i32 to v1i64.
+ // There are currently two cases of vector promotion:
+ // 1) Bitcasting a vector of integers to a different type to a vector of the
+ // same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64.
+ // 2) Extending a vector of floats to a vector of the same number oflarger
+ // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
MVT VT = Op.getSimpleValueType();
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
@@ -365,14 +386,23 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
- Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ if (Op.getOperand(j)
+ .getValueType()
+ .getVectorElementType()
+ .isFloatingPoint())
+ Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
-
- return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+ if (VT.isFloatingPoint() ||
+ (VT.isVector() && VT.getVectorElementType().isFloatingPoint()))
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));
+ else
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
@@ -480,7 +510,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), LD->getAlignment(),
- LD->getTBAAInfo());
+ LD->getAAInfo());
} else {
EVT LoadVT = WideVT;
while (RemainingBytes < LoadBytes) {
@@ -490,8 +520,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LoadVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment(),
- LD->getTBAAInfo());
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment(), LD->getAAInfo());
}
RemainingBytes -= LoadBytes;
@@ -561,8 +591,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Op.getNode()->getValueType(0).getScalarType(),
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment(), LD->getTBAAInfo());
+ LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment(), LD->getAAInfo());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
@@ -593,7 +623,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
unsigned NumElem = StVT.getVectorNumElements();
// The type of the data we want to save
@@ -621,7 +651,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 71240fc..27f63d2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -106,6 +106,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOPYSIGN:
case ISD::FDIV:
case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+
case ISD::FPOW:
case ISD::FREM:
case ISD::FSUB:
@@ -223,7 +226,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->isInvariant(), N->getOriginalAlignment(),
- N->getTBAAInfo());
+ N->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -234,7 +237,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
// Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
EVT DestVT = N->getValueType(0).getVectorElementType();
- SDValue Op = GetScalarizedVector(N->getOperand(0));
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ // This is a workaround for targets where it's impossible to scalarize the
+ // result of a conversion, because the source type is legal.
+ // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
+ // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
+ // legal and was not scalarized.
+ // See the similar logic in ScalarizeVecRes_VSETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ }
return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
}
@@ -408,6 +427,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::TRUNCATE:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::CONCAT_VECTORS:
@@ -451,11 +474,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
N->getValueType(0), Elt);
}
-/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
-/// to be scalarized, it must be <1 x ty>. Extend the element instead.
+/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Do the operation on the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
- "Unexected vector type!");
+ "Unexpected vector type!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
N->getValueType(0).getScalarType(), Elt);
@@ -509,12 +532,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->getAlignment(), N->getAAInfo());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment(), N->getTBAAInfo());
+ N->getOriginalAlignment(), N->getAAInfo());
}
/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs
@@ -627,6 +650,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::FDIV:
@@ -868,6 +893,10 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
return;
}
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return;
+
// Spill the vector to the stack.
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
@@ -923,14 +952,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ AAMDNodes AAInfo = LD->getAAInfo();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- isInvariant, Alignment, TBAAInfo);
+ isInvariant, Alignment, AAInfo);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -938,7 +967,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize),
HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
- TBAAInfo);
+ AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1349,6 +1378,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
Idx.getValueType())), 0);
}
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return SDValue();
+
// Store the vector to the stack.
EVT EltVT = VecVT.getVectorElementType();
SDLoc dl(N);
@@ -1359,7 +1392,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- MachinePointerInfo(), EltVT, false, false, 0);
+ MachinePointerInfo(), EltVT, false, false, false, 0);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1374,7 +1407,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
bool isNT = N->isNonTemporal();
- const MDNode *TBAAInfo = N->getTBAAInfo();
+ AAMDNodes AAInfo = N->getAAInfo();
SDValue Lo, Hi;
GetSplitVector(N->getOperand(1), Lo, Hi);
@@ -1385,10 +1418,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (isTruncating)
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- LoMemVT, isVol, isNT, Alignment, TBAAInfo);
+ LoMemVT, isVol, isNT, Alignment, AAInfo);
else
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- isVol, isNT, Alignment, TBAAInfo);
+ isVol, isNT, Alignment, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -1397,11 +1430,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (isTruncating)
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVol, isNT, Alignment, TBAAInfo);
+ HiMemVT, isVol, isNT, Alignment, AAInfo);
else
Hi = DAG.getStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- isVol, isNT, Alignment, TBAAInfo);
+ isVol, isNT, Alignment, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
@@ -1575,6 +1608,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::OR:
case ISD::SUB:
case ISD::XOR:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
Res = WidenVecRes_Binary(N);
break;
@@ -2737,7 +2772,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ AAMDNodes AAInfo = LD->getAAInfo();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2748,7 +2783,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
isVolatile, isNonTemporal, isInvariant, Align,
- TBAAInfo);
+ AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
@@ -2793,7 +2828,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset), isVolatile,
isNonTemporal, isInvariant, MinAlign(Align, Increment),
- TBAAInfo);
+ AAInfo);
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) {
SmallVector<SDValue, 16> Loads;
@@ -2809,7 +2844,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset), isVolatile,
isNonTemporal, isInvariant, MinAlign(Align, Increment),
- TBAAInfo);
+ AAInfo);
LdChain.push_back(L.getValue(1));
}
@@ -2889,7 +2924,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ bool isInvariant = LD->isInvariant();
+ AAMDNodes AAInfo = LD->getAAInfo();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
@@ -2901,7 +2937,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Increment = LdEltVT.getSizeInBits() / 8;
Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
LD->getPointerInfo(),
- LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo);
+ LdEltVT, isVolatile, isNonTemporal, isInvariant,
+ Align, AAInfo);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
@@ -2911,7 +2948,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
BasePtr.getValueType()));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
- isVolatile, isNonTemporal, Align, TBAAInfo);
+ isVolatile, isNonTemporal, isInvariant, Align,
+ AAInfo);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2934,7 +2972,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -2961,7 +2999,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
- MinAlign(Align, Offset), TBAAInfo));
+ MinAlign(Align, Offset), AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
@@ -2981,7 +3019,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
- MinAlign(Align, Offset), TBAAInfo));
+ MinAlign(Align, Offset), AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -3003,7 +3041,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -3027,7 +3065,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo(), StEltVT,
isVolatile, isNonTemporal, Align,
- TBAAInfo));
+ AAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
@@ -3038,7 +3076,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
ST->getPointerInfo().getWithOffset(Offset),
StEltVT, isVolatile, isNonTemporal,
- MinAlign(Align, Offset), TBAAInfo));
+ MinAlign(Align, Offset), AAInfo));
}
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 624003f..db38b76 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -40,32 +41,29 @@ static cl::opt<signed> RegPressureThreshold(
"dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
cl::desc("Track reg pressure and switch priority to in-depth"));
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
+ : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TLI = IS->TLI;
+ TII = STI.getInstrInfo();
+ ResourcesModel = TII->CreateTargetScheduleState(STI);
+ // This hard requirement could be relaxed, but for now
+ // do not let it procede.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end();
+ I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
-ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
- Picker(this),
- InstrItins(IS->getTargetLowering()->getTargetMachine().getInstrItineraryData())
-{
- TII = IS->getTargetLowering()->getTargetMachine().getInstrInfo();
- TRI = IS->getTargetLowering()->getTargetMachine().getRegisterInfo();
- TLI = IS->getTargetLowering();
-
- const TargetMachine &tm = (*IS->MF).getTarget();
- ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr);
- // This hard requirement could be relaxed, but for now
- // do not let it procede.
- assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
-
- unsigned NumRC = TRI->getNumRegClasses();
- RegLimit.resize(NumRC);
- RegPressure.resize(NumRC);
- std::fill(RegLimit.begin(), RegLimit.end(), 0);
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
-
- ParallelLiveRanges = 0;
- HorizontalVerticalBalance = 0;
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
}
unsigned
@@ -319,7 +317,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) {
// If packet is now full, reset the state so in the next cycle
// we start fresh.
- if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
+ if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
ResourcesModel->clearResources();
Packet.clear();
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index ee54292..bce69d7 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
-#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugLoc.h"
@@ -44,7 +44,8 @@ private:
const Value *Const; // valid for constants
unsigned FrameIx; // valid for stack objects
} u;
- MDNode *mdPtr;
+ MDNode *Var;
+ MDNode *Expr;
bool IsIndirect;
uint64_t Offset;
DebugLoc DL;
@@ -52,69 +53,72 @@ private:
bool Invalid;
public:
// Constructor for non-constants.
- SDDbgValue(MDNode *mdP, SDNode *N, unsigned R,
- bool indir, uint64_t off, DebugLoc dl,
- unsigned O) : mdPtr(mdP), IsIndirect(indir),
- Offset(off), DL(dl), Order(O),
- Invalid(false) {
+ SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
+ uint64_t off, DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = SDNODE;
u.s.Node = N;
u.s.ResNo = R;
}
// Constructor for constants.
- SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
- unsigned O) :
- mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O),
- Invalid(false) {
+ SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
+ DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = CONST;
u.Const = C;
}
// Constructor for frame indices.
- SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) :
- mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O),
- Invalid(false) {
+ SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
+ unsigned O)
+ : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = FRAMEIX;
u.FrameIx = FI;
}
// Returns the kind.
- DbgValueKind getKind() { return kind; }
+ DbgValueKind getKind() const { return kind; }
- // Returns the MDNode pointer.
- MDNode *getMDPtr() { return mdPtr; }
+ // Returns the MDNode pointer for the variable.
+ MDNode *getVariable() const { return Var; }
+
+ // Returns the MDNode pointer for the expression.
+ MDNode *getExpression() const { return Expr; }
// Returns the SDNode* for a register ref
- SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+ SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; }
// Returns the ResNo for a register ref
- unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+ unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; }
// Returns the Value* for a constant
- const Value *getConst() { assert (kind==CONST); return u.Const; }
+ const Value *getConst() const { assert (kind==CONST); return u.Const; }
// Returns the FrameIx for a stack object
- unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+ unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
// Returns whether this is an indirect value.
- bool isIndirect() { return IsIndirect; }
+ bool isIndirect() const { return IsIndirect; }
// Returns the offset.
- uint64_t getOffset() { return Offset; }
+ uint64_t getOffset() const { return Offset; }
// Returns the DebugLoc.
- DebugLoc getDebugLoc() { return DL; }
+ DebugLoc getDebugLoc() const { return DL; }
// Returns the SDNodeOrder. This is the order of the preceding node in the
// input.
- unsigned getOrder() { return Order; }
+ unsigned getOrder() const { return Order; }
// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
// property. A SDDbgValue is invalid if the SDNode that produces the value is
// deleted.
void setIsInvalidated() { Invalid = true; }
- bool isInvalidated() { return Invalid; }
+ bool isInvalidated() const { return Invalid; }
};
} // end llvm namespace
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 4d8c2c7..61a3fd7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -221,7 +221,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
+ MVT VT = N->getSimpleValueType(i);
if (VT == MVT::Glue)
return nullptr;
else if (VT == MVT::Other)
@@ -229,7 +229,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
}
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
- EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
if (VT == MVT::Glue)
return nullptr;
}
@@ -431,17 +431,23 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
/// definition of the specified node.
/// FIXME: Move to SelectionDAG?
-static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const TargetInstrInfo *TII) {
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
- unsigned NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
- if (Reg == *ImpDef)
- break;
- ++NumRes;
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
}
- return N->getValueType(NumRes);
+ return N->getSimpleValueType(NumRes);
}
/// CheckForLiveRegDef - Return true and update live register vector if the
@@ -454,7 +460,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
bool Added = false;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
- if (RegAdded.insert(*AI)) {
+ if (RegAdded.insert(*AI).second) {
LRegs.push_back(*AI);
Added = true;
}
@@ -572,7 +578,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg];
- EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 13cfae7..8b54e656 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -30,8 +30,8 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -166,12 +166,11 @@ public:
NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
Topo(SUnits, nullptr) {
- const TargetMachine &tm = mf.getTarget();
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
if (DisableSchedCycles || !NeedLatency)
HazardRec = new ScheduleHazardRecognizer();
else
- HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
- tm.getSubtargetImpl(), this);
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
}
~ScheduleDAGRRList() {
@@ -946,7 +945,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
+ MVT VT = N->getSimpleValueType(i);
if (VT == MVT::Glue)
return nullptr;
else if (VT == MVT::Other)
@@ -954,7 +953,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
}
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
- EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
if (VT == MVT::Glue)
return nullptr;
}
@@ -1189,17 +1188,23 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
/// definition of the specified node.
/// FIXME: Move to SelectionDAG?
-static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const TargetInstrInfo *TII) {
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
- unsigned NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
- if (Reg == *ImpDef)
- break;
- ++NumRes;
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
}
- return N->getValueType(NumRes);
+ return N->getSimpleValueType(NumRes);
}
/// CheckForLiveRegDef - Return true and update live register vector if the
@@ -1218,7 +1223,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
if (LiveRegDefs[*AliasI] == SU) continue;
// Add Reg to the set of interfering live regs.
- if (RegAdded.insert(*AliasI)) {
+ if (RegAdded.insert(*AliasI).second) {
LRegs.push_back(*AliasI);
}
}
@@ -1235,7 +1240,7 @@ static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
if (!LiveRegDefs[i]) continue;
if (LiveRegDefs[i] == SU) continue;
if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
- if (RegAdded.insert(i))
+ if (RegAdded.insert(i).second)
LRegs.push_back(i);
}
}
@@ -1310,7 +1315,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
SDNode *Gen = LiveRegGens[CallResource]->getNode();
while (SDNode *Glued = Gen->getGluedNode())
Gen = Glued;
- if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+ if (!IsChainDependent(Gen, Node, 0, TII) &&
+ RegAdded.insert(CallResource).second)
LRegs.push_back(CallResource);
}
}
@@ -1373,7 +1379,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
Interferences.push_back(CurSU);
}
else {
- assert(CurSU->isPending && "Intereferences are pending");
+ assert(CurSU->isPending && "Interferences are pending");
// Update the interference with current live regs.
LRegsPair.first->second = LRegs;
}
@@ -1439,7 +1445,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg];
- EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
@@ -1930,8 +1936,8 @@ void RegReductionPQBase::dumpRegPressure() const {
unsigned Id = RC->getID();
unsigned RP = RegPressure[Id];
if (!RP) continue;
- DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
- << '\n');
+ DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
+ << RegLimit[Id] << '\n');
}
#endif
}
@@ -2754,7 +2760,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
if (!SUImpDefs && !SURegMask)
continue;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
+ MVT VT = N->getSimpleValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
@@ -2977,9 +2983,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
llvm::ScheduleDAGSDNodes *
llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
BURegReductionPriorityQueue *PQ =
new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
@@ -2991,9 +2997,9 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
llvm::ScheduleDAGSDNodes *
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
SrcRegReductionPriorityQueue *PQ =
new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
@@ -3005,10 +3011,10 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
llvm::ScheduleDAGSDNodes *
llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- const TargetLowering *TLI = IS->getTargetLowering();
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
HybridBURRPriorityQueue *PQ =
new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
@@ -3021,10 +3027,10 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
llvm::ScheduleDAGSDNodes *
llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- const TargetLowering *TLI = IS->getTargetLowering();
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
ILPBURRPriorityQueue *PQ =
new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index de910b7..8b9f618 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -29,7 +29,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -38,17 +37,17 @@ using namespace llvm;
STATISTIC(LoadsClustered, "Number of loads clustered together");
-// This allows latency based scheduler to notice high latency instructions
-// without a target itinerary. The choise if number here has more to do with
-// balancing scheduler heursitics than with the actual machine latency.
+// This allows the latency-based scheduler to notice high latency instructions
+// without a target itinerary. The choice of number here has more to do with
+// balancing scheduler heuristics than with the actual machine latency.
static cl::opt<int> HighLatencyCycles(
"sched-high-latency-cycles", cl::Hidden, cl::init(10),
cl::desc("Roughly estimate the number of cycles that 'long latency'"
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
- InstrItins(mf.getTarget().getInstrItineraryData()) {}
+ : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
+ InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
@@ -120,15 +119,20 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
return;
unsigned ResNo = User->getOperand(2).getResNo();
- if (Def->isMachineOpcode()) {
+ if (Def->getOpcode() == ISD::CopyFromReg &&
+ cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) {
+ PhysReg = Reg;
+ } else if (Def->isMachineOpcode()) {
const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
if (ResNo >= II.getNumDefs() &&
- II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
PhysReg = Reg;
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
- Cost = RC->getCopyCost();
- }
+ }
+
+ if (PhysReg != 0) {
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
+ Cost = RC->getCopyCost();
}
}
@@ -136,7 +140,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
SmallVectorImpl<EVT> &VTs,
SDValue ExtraOper = SDValue()) {
- SmallVector<SDValue, 4> Ops;
+ SmallVector<SDValue, 8> Ops;
for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
Ops.push_back(N->getOperand(I));
@@ -226,7 +230,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
I != E && UseCount < 100; ++I, ++UseCount) {
SDNode *User = *I;
- if (User == Node || !Visited.insert(User))
+ if (User == Node || !Visited.insert(User).second)
continue;
int64_t Offset1, Offset2;
if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
@@ -339,7 +343,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// Add all operands to the worklist unless they've already been added.
for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
- if (Visited.insert(NI->getOperand(i).getNode()))
+ if (Visited.insert(NI->getOperand(i).getNode()).second)
Worklist.push_back(NI->getOperand(i).getNode());
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
@@ -425,7 +429,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
}
void ScheduleDAGSDNodes::AddSchedEdges() {
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = forceUnitLatencies();
@@ -733,7 +737,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
SmallSet<unsigned, 8> &Seen) {
unsigned Order = N->getIROrder();
- if (!Order || !Seen.insert(Order)) {
+ if (!Order || !Seen.insert(Order).second) {
// Process any valid SDDbgValues even if node does not have any order
// assigned.
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 39ebadf..2cd1f4b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SCHEDULEDAGSDNODES_H
-#define SCHEDULEDAGSDNODES_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ScheduleDAG.h"
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 4589b0c..418b58e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -71,10 +72,8 @@ public:
AliasAnalysis *aa,
SchedulingPriorityQueue *availqueue)
: ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
-
- const TargetMachine &tm = mf.getTarget();
- HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
- tm.getSubtargetImpl(), this);
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
}
~ScheduleDAGVLIW() {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index daff1f2..7961e66 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -46,6 +46,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cmath>
@@ -95,7 +96,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
/// BUILD_VECTOR where all of the elements are ~0 or undef.
bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BITCAST)
+ while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -143,7 +144,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BITCAST)
+ while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -686,6 +687,15 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
DeallocateNode(N);
}
+void SDDbgInfo::erase(const SDNode *Node) {
+ DbgValMapType::iterator I = DbgValMap.find(Node);
+ if (I == DbgValMap.end())
+ return;
+ for (auto &Val: I->second)
+ Val->setIsInvalidated();
+ DbgValMap.erase(I);
+}
+
void SelectionDAG::DeallocateNode(SDNode *N) {
if (N->OperandsNeedDelete)
delete[] N->OperandList;
@@ -696,10 +706,60 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
NodeAllocator.Deallocate(AllNodes.remove(N));
- // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
- ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
- for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
- DbgVals[i]->setIsInvalidated();
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate
+ // them and forget about that node.
+ DbgInfo->erase(N);
+}
+
+#ifndef NDEBUG
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
+ break;
+ }
+ }
+}
+#endif // NDEBUG
+
+/// \brief Insert a newly allocated node into the DAG.
+///
+/// Handles insertion into the all nodes list and CSE map, as well as
+/// verification and other common operations when a new node is allocated.
+void SelectionDAG::InsertNode(SDNode *N) {
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
}
/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -839,83 +899,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
return Node;
}
-#ifndef NDEBUG
-/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
-static void VerifyNodeCommon(SDNode *N) {
- switch (N->getOpcode()) {
- default:
- break;
- case ISD::BUILD_PAIR: {
- EVT VT = N->getValueType(0);
- assert(N->getNumValues() == 1 && "Too many results!");
- assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
- "Wrong return type!");
- assert(N->getNumOperands() == 2 && "Wrong number of operands!");
- assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
- "Mismatched operand types!");
- assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
- "Wrong operand type!");
- assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
- "Wrong return type size");
- break;
- }
- case ISD::BUILD_VECTOR: {
- assert(N->getNumValues() == 1 && "Too many results!");
- assert(N->getValueType(0).isVector() && "Wrong return type!");
- assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
- "Wrong number of operands!");
- EVT EltVT = N->getValueType(0).getVectorElementType();
- for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
- assert((I->getValueType() == EltVT ||
- (EltVT.isInteger() && I->getValueType().isInteger() &&
- EltVT.bitsLE(I->getValueType()))) &&
- "Wrong operand type!");
- assert(I->getValueType() == N->getOperand(0).getValueType() &&
- "Operands must all have the same type");
- }
- break;
- }
- }
-}
-
-/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
-static void VerifySDNode(SDNode *N) {
- // The SDNode allocators cannot be used to allocate nodes with fields that are
- // not present in an SDNode!
- assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
- assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
- assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
- assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
- assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
- assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
- assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
- assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
- assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
- assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
- assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
- assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
- assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
- assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
- assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
- assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
- assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
- assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
- assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
-
- VerifyNodeCommon(N);
-}
-
-/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
-/// invalid.
-static void VerifyMachineNode(SDNode *N) {
- // The MachineNode allocators cannot be used to allocate nodes with fields
- // that are not present in a MachineNode!
- // Currently there are no such nodes.
-
- VerifyNodeCommon(N);
-}
-#endif // NDEBUG
-
/// getEVTAlignment - Compute the default alignment value for the
/// given type.
///
@@ -924,22 +907,23 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
- return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty);
+ return TLI->getDataLayout()->getABITypeAlignment(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL),
- EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
- UpdateListeners(nullptr) {
+ : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL),
+ EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
+ Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
+ UpdateListeners(nullptr) {
AllNodes.push_back(&EntryNode);
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) {
+void SelectionDAG::init(MachineFunction &mf) {
MF = &mf;
- TLI = tli;
+ TLI = getSubtarget().getTargetLowering();
+ TSI = getSubtarget().getSelectionDAGInfo();
Context = &mf.getFunction()->getContext();
}
@@ -1108,8 +1092,6 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
EVT EltVT = VT.getScalarType();
const ConstantInt *Elt = &Val;
- const TargetLowering *TLI = TM.getTargetLowering();
-
// In some cases the vector type is legal but the element type is illegal and
// needs to be promoted, for example v8i8 on ARM. In this case, promote the
// inserted value (the type does not need to match the vector element type).
@@ -1185,7 +1167,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
if (!N) {
N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
}
SDValue Result(N, 0);
@@ -1198,7 +1180,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
}
SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
- return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget);
+ return getConstant(Val, TLI->getPointerTy(), isTarget);
}
@@ -1227,7 +1209,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
if (!N) {
N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
}
SDValue Result(N, 0);
@@ -1263,7 +1245,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
- const TargetLowering *TLI = TM.getTargetLowering();
// Truncate (with sign-extension) the offset value to the pointer size.
unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType());
@@ -1290,7 +1271,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
DL.getDebugLoc(), GV, VT,
Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1305,7 +1286,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1325,7 +1306,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1336,8 +1317,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment =
- TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType());
+ Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1352,7 +1332,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
Alignment, TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1364,8 +1344,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment =
- TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType());
+ Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1380,7 +1359,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
Alignment, TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1398,7 +1377,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1412,7 +1391,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1426,7 +1405,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
if (N) return SDValue(N, 0);
N = new (NodeAllocator) VTSDNode(VT);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1434,7 +1413,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
SDNode *&N = ExternalSymbols[Sym];
if (N) return SDValue(N, 0);
N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1445,7 +1424,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
TargetFlags)];
if (N) return SDValue(N, 0);
N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1456,7 +1435,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
if (!CondCodeNodes[Cond]) {
CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
CondCodeNodes[Cond] = N;
- AllNodes.push_back(N);
+ InsertNode(N);
}
return SDValue(CondCodeNodes[Cond], 0);
@@ -1594,10 +1573,31 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
dl.getDebugLoc(), N1, N2,
MaskAlloc);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
+SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
+ MVT VT = SV.getSimpleValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int Idx = SV.getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElems)
+ Idx += NumElems;
+ else
+ Idx -= NumElems;
+ }
+ MaskVec.push_back(Idx);
+ }
+
+ SDValue Op0 = SV.getOperand(0);
+ SDValue Op1 = SV.getOperand(1);
+ return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]);
+}
+
SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
SDValue Val, SDValue DTy,
SDValue STy, SDValue Rnd, SDValue Sat,
@@ -1619,7 +1619,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
dl.getDebugLoc(),
Ops, Code);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1633,7 +1633,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1647,7 +1647,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1663,7 +1663,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(),
dl.getDebugLoc(), Root, Label);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1686,7 +1686,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1704,7 +1704,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1720,7 +1720,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1741,7 +1741,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
dl.getDebugLoc(),
VT, Ptr, SrcAS, DestAS);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1749,7 +1749,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
- EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy);
+ EVT ShTy = TLI->getShiftAmountTy(LHSTy);
if (OpTy == ShTy || OpTy.isVector()) return Op;
ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1762,7 +1762,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
unsigned ByteSize = VT.getStoreSize();
Type *Ty = VT.getTypeForEVT(*getContext());
- const TargetLowering *TLI = TM.getTargetLowering();
unsigned StackAlign =
std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
@@ -1777,7 +1776,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
VT2.getStoreSizeInBits())/8;
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
- const TargetLowering *TLI = TM.getTargetLowering();
const DataLayout *TD = TLI->getDataLayout();
unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
TD->getPrefTypeAlignment(Ty2));
@@ -1796,7 +1794,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
case ISD::SETFALSE2: return getConstant(0, VT);
case ISD::SETTRUE:
case ISD::SETTRUE2: {
- const TargetLowering *TLI = TM.getTargetLowering();
TargetLowering::BooleanContent Cnt =
TLI->getBooleanContents(N1->getValueType(0));
return getConstant(
@@ -1885,7 +1882,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
// Ensure that the constant occurs on the RHS.
ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
MVT CompVT = N1.getValueType().getSimpleVT();
- if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT))
+ if (!TLI->isCondCodeLegal(SwappedCond, CompVT))
return SDValue();
return getSetCC(dl, VT, N2, N1, SwappedCond);
@@ -1921,7 +1918,6 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
/// them in the KnownZero/KnownOne bitsets.
void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
APInt &KnownOne, unsigned Depth) const {
- const TargetLowering *TLI = TM.getTargetLowering();
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
@@ -2357,7 +2353,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
/// information. For example, immediately after an "SRA X, 2", we know that
/// the top 3 bits are all equal to each other, so we return 3.
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
- const TargetLowering *TLI = TM.getTargetLowering();
EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarType().getSizeInBits();
@@ -2655,10 +2650,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
DL.getDebugLoc(), getVTList(VT));
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -2753,7 +2745,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
case ISD::FP_TO_UINT: {
integerPart x[2];
bool ignored;
- assert(integerPartWidth >= 64);
+ static_assert(integerPartWidth >= 64, "APFloat parts too small!");
// FIXME need to be more flexible about rounding mode.
APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
Opcode==ISD::FP_TO_SINT,
@@ -2772,6 +2764,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
}
}
+ // Constant fold unary operations with a vector integer operand.
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
+ if (BV->isConstant()) {
+ switch (Opcode) {
+ default:
+ // FIXME: Entirely reasonable to perform folding of other unary
+ // operations here as the need arises.
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ SmallVector<SDValue, 8> Ops;
+ for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ SDValue OpN = BV->getOperand(i);
+ // Let the above scalar folding handle the conversion of each
+ // element.
+ OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(),
+ OpN);
+ Ops.push_back(OpN);
+ }
+ return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ }
+ }
+ }
+ }
+
unsigned OpOpcode = Operand.getNode()->getOpcode();
switch (Opcode) {
case ISD::TokenFactor:
@@ -2931,10 +2948,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
DL.getDebugLoc(), VTs, Operand);
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -3375,6 +3389,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Constant fold FP operations.
+ bool HasFPExceptions = TLI->hasFloatingPointExceptions();
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
if (N1CFP) {
@@ -3388,28 +3403,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
switch (Opcode) {
case ISD::FADD:
s = V1.add(V2, APFloat::rmNearestTiesToEven);
- if (s != APFloat::opInvalidOp)
+ if (!HasFPExceptions || s != APFloat::opInvalidOp)
return getConstantFP(V1, VT);
break;
case ISD::FSUB:
s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
- if (s!=APFloat::opInvalidOp)
+ if (!HasFPExceptions || s!=APFloat::opInvalidOp)
return getConstantFP(V1, VT);
break;
case ISD::FMUL:
s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
- if (s!=APFloat::opInvalidOp)
+ if (!HasFPExceptions || s!=APFloat::opInvalidOp)
return getConstantFP(V1, VT);
break;
case ISD::FDIV:
s = V1.divide(V2, APFloat::rmNearestTiesToEven);
- if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
+ s!=APFloat::opDivByZero)) {
return getConstantFP(V1, VT);
+ }
break;
case ISD::FREM :
s = V1.mod(V2, APFloat::rmNearestTiesToEven);
- if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
+ s!=APFloat::opDivByZero)) {
return getConstantFP(V1, VT);
+ }
break;
case ISD::FCOPYSIGN:
V1.copySign(V2);
@@ -3526,10 +3545,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -3633,10 +3649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
DL.getDebugLoc(), VTs, N1, N2, N3);
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -3802,7 +3815,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
if (VT == MVT::Other) {
unsigned AS = 0;
if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
- TLI.allowsUnalignedMemoryAccesses(VT, AS)) {
+ TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) {
VT = TLI.getPointerTy();
} else {
switch (DstAlign & 7) {
@@ -3862,7 +3875,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned AS = 0;
if (NumMemOps && AllowOverlap &&
VTSize >= 8 && NewVTSize < Size &&
- TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast)
+ TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast)
VTSize = Size;
else {
VT = NewVT;
@@ -3926,7 +3939,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
// Don't promote to an alignment that would require dynamic stack
// realignment.
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
while (NewAlign > Align &&
TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
@@ -3982,7 +3995,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
getMemBasePlusOffset(Src, SrcOff, dl, DAG),
SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
- MinAlign(SrcAlign, SrcOff));
+ false, MinAlign(SrcAlign, SrcOff));
Store = DAG.getTruncStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, dl, DAG),
DstPtrInfo.getWithOffset(DstOff), VT, isVol,
@@ -4202,9 +4215,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memcpy with target-specific
// code. If the target chooses to do this, this is the next best.
SDValue Result =
- TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
- isVol, AlwaysInline,
- DstPtrInfo, SrcPtrInfo);
+ TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -4223,8 +4235,6 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
// beyond the given memory regions. But fixing this isn't easy, and most
// people don't care.
- const TargetLowering *TLI = TM.getTargetLowering();
-
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -4270,17 +4280,14 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result =
- TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstPtrInfo, SrcPtrInfo);
+ SDValue Result = TSI->EmitTargetCodeForMemmove(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
- const TargetLowering *TLI = TM.getTargetLowering();
-
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -4325,31 +4332,22 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memset with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result =
- TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstPtrInfo);
+ SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src,
+ Size, Align, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
// Emit a library call.
- const TargetLowering *TLI = TM.getTargetLowering();
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst; Entry.Ty = IntPtrTy;
Args.push_back(Entry);
- // Extend or truncate the argument to be an i32 value for the call.
- if (Src.getValueType().bitsGT(MVT::i32))
- Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
- else
- Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
Entry.Node = Src;
- Entry.Ty = Type::getInt32Ty(*getContext());
- Entry.isSExt = true;
+ Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
Args.push_back(Entry);
Entry.Node = Size;
Entry.Ty = IntPtrTy;
- Entry.isSExt = false;
Args.push_back(Entry);
// FIXME: pass in SDLoc
@@ -4396,7 +4394,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SuccessOrdering, FailureOrdering,
SynchScope);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4541,7 +4539,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
- bool ReadMem, bool WriteMem) {
+ bool ReadMem, bool WriteMem, unsigned Size) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
@@ -4553,8 +4551,10 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
Flags |= MachineMemOperand::MOLoad;
if (Vol)
Flags |= MachineMemOperand::MOVolatile;
+ if (!Size)
+ Size = MemVT.getStoreSize();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
@@ -4593,7 +4593,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
dl.getDebugLoc(), VTList, Ops,
MemVT, MMO);
}
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4637,7 +4637,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
SDValue Ptr, SDValue Offset,
MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal, bool isInvariant,
- unsigned Alignment, const MDNode *TBAAInfo,
+ unsigned Alignment, const AAMDNodes &AAInfo,
const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
@@ -4660,7 +4660,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- TBAAInfo, Ranges);
+ AAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
@@ -4709,7 +4709,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
dl.getDebugLoc(), VTs, AM, ExtType,
MemVT, MMO);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4718,12 +4718,12 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
bool isInvariant, unsigned Alignment,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
- TBAAInfo, Ranges);
+ AAInfo, Ranges);
}
SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
@@ -4738,11 +4738,12 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ bool isInvariant, unsigned Alignment,
+ const AAMDNodes &AAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
- TBAAInfo);
+ PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant,
+ Alignment, AAInfo);
}
@@ -4769,7 +4770,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base,
SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ unsigned Alignment, const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
@@ -4788,7 +4789,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags,
Val.getValueType().getStoreSize(), Alignment,
- TBAAInfo);
+ AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
@@ -4816,7 +4817,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
dl.getDebugLoc(), VTs,
ISD::UNINDEXED, false, VT, MMO);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4824,7 +4825,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
EVT SVT,bool isVolatile, bool isNonTemporal,
unsigned Alignment,
- const MDNode *TBAAInfo) {
+ const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
@@ -4842,7 +4843,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
- TBAAInfo);
+ AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -4885,7 +4886,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
dl.getDebugLoc(), VTs,
ISD::UNINDEXED, true, SVT, MMO);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4912,7 +4913,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
ST->getMemoryVT(),
ST->getMemOperand());
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4991,10 +4992,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
VTs, Ops);
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -5074,15 +5072,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
VTList, Ops);
}
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) {
- return getNode(Opcode, DL, VTList, ArrayRef<SDValue>());
+ return getNode(Opcode, DL, VTList, None);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
@@ -5464,6 +5459,10 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
/// node, and because it doesn't require CSE recalculation for any of
/// the node's users.
///
+/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG.
+/// As a consequence it isn't appropriate to use from within the DAG combiner or
+/// the legalizer which maintain worklists that would need to be updated when
+/// deleting things.
SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
SDVTList VTs, ArrayRef<SDValue> Ops) {
unsigned NumOps = Ops.size();
@@ -5530,10 +5529,9 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
// new operands.
if (!DeadNodeSet.empty()) {
SmallVector<SDNode *, 16> DeadNodes;
- for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
- E = DeadNodeSet.end(); I != E; ++I)
- if ((*I)->use_empty())
- DeadNodes.push_back(*I);
+ for (SDNode *N : DeadNodeSet)
+ if (N->use_empty())
+ DeadNodes.push_back(N);
RemoveDeadNodes(DeadNodes);
}
@@ -5702,10 +5700,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
if (DoCSE)
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifyMachineNode(N);
-#endif
+ InsertNode(N);
return N;
}
@@ -5751,26 +5746,24 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
/// getDbgValue - Creates a SDDbgValue node.
///
/// SDNode
-SDDbgValue *
-SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R,
- bool IsIndirect, uint64_t Off,
- DebugLoc DL, unsigned O) {
- return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O);
+SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
+ unsigned R, bool IsIndirect, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
}
/// Constant
-SDDbgValue *
-SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C,
- uint64_t Off,
- DebugLoc DL, unsigned O) {
- return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
+ const Value *C, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O);
}
/// FrameIndex
-SDDbgValue *
-SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
- DebugLoc DL, unsigned O) {
- return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
+ unsigned FI, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O);
}
namespace {
@@ -6159,9 +6152,11 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
/// value is produced by SD.
void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
- DbgInfo->add(DB, SD, isParameter);
- if (SD)
+ if (SD) {
+ assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue());
SD->setHasDebugValue(true);
+ }
+ DbgInfo->add(DB, SD, isParameter);
}
/// TransferDbgValues - Transfer SDDbgValues.
@@ -6176,10 +6171,10 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
I != E; ++I) {
SDDbgValue *Dbg = *I;
if (Dbg->getKind() == SDDbgValue::SDNODE) {
- SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
- Dbg->isIndirect(),
- Dbg->getOffset(), Dbg->getDebugLoc(),
- Dbg->getOrder());
+ SDDbgValue *Clone =
+ getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
+ To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
+ Dbg->getDebugLoc(), Dbg->getOrder());
ClonedDVs.push_back(Clone);
}
}
@@ -6217,7 +6212,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(isNonTemporal() == MMO->isNonTemporal() &&
"Non-temporal encoding error!");
- assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+ // We check here that the size of the memory operand fits within the size of
+ // the MMO. This is because the MMO might indicate only a possible address
+ // range instead of specifying the affected memory addresses precisely.
+ assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
}
MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
@@ -6227,7 +6225,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
- assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+ assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
}
/// Profile - Gather unique data for the node.
@@ -6371,7 +6369,7 @@ bool SDNode::hasPredecessor(const SDNode *N) const {
bool
SDNode::hasPredecessorHelper(const SDNode *N,
- SmallPtrSet<const SDNode *, 32> &Visited,
+ SmallPtrSetImpl<const SDNode *> &Visited,
SmallVectorImpl<const SDNode *> &Worklist) const {
if (Visited.empty()) {
Worklist.push_back(this);
@@ -6387,7 +6385,7 @@ SDNode::hasPredecessorHelper(const SDNode *N,
const SDNode *M = Worklist.pop_back_val();
for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
SDNode *Op = M->getOperand(i).getNode();
- if (Visited.insert(Op))
+ if (Visited.insert(Op).second)
Worklist.push_back(Op);
if (Op == N)
return true;
@@ -6427,7 +6425,6 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
EVT OperandVT = Operand.getValueType();
if (OperandVT.isVector()) {
// A vector operand; extract a single element.
- const TargetLowering *TLI = TM.getTargetLowering();
EVT OperandEltVT = OperandVT.getVectorElementType();
Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
OperandEltVT,
@@ -6507,7 +6504,6 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
- const TargetLowering *TLI = TM.getTargetLowering();
bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1);
bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
if (isGA1 && isGA2 && GV1 == GV2)
@@ -6522,7 +6518,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
const GlobalValue *GV;
int64_t GVOffset = 0;
- const TargetLowering *TLI = TM.getTargetLowering();
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
@@ -6749,8 +6744,8 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
- SmallPtrSet<const SDNode*, 32> &Visited,
- SmallPtrSet<const SDNode*, 32> &Checked,
+ SmallPtrSetImpl<const SDNode*> &Visited,
+ SmallPtrSetImpl<const SDNode*> &Checked,
const llvm::SelectionDAG *DAG) {
// If this node has already been checked, don't check it again.
if (Checked.count(N))
@@ -6758,7 +6753,7 @@ static void checkForCyclesHelper(const SDNode *N,
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
- if (!Visited.insert(N)) {
+ if (!Visited.insert(N).second) {
errs() << "Detected cycle in SelectionDAG\n";
dbgs() << "Offending node:\n";
N->dumprFull(DAG); dbgs() << "\n";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 28d8e98..8f582f1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -58,6 +58,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -645,8 +646,10 @@ namespace {
/// specified value into the registers specified by this object. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
- void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
- SDValue &Chain, SDValue *Flag, const Value *V) const;
+ void
+ getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
+ SDValue *Flag, const Value *V,
+ ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
@@ -761,9 +764,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
- SDValue &Chain, SDValue *Flag,
- const Value *V) const {
+ SDValue &Chain, SDValue *Flag, const Value *V,
+ ISD::NodeType PreferredExtendType) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ISD::NodeType ExtendKind = PreferredExtendType;
// Get the list of the values's legal parts.
unsigned NumRegs = Regs.size();
@@ -772,8 +776,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
EVT ValueVT = ValueVTs[Value];
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
MVT RegisterVT = RegVTs[Value];
- ISD::NodeType ExtendKind =
- TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
+
+ if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
+ ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
&Parts[Part], NumParts, RegisterVT, V, ExtendKind);
@@ -860,7 +865,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
AA = &aa;
GFI = gfi;
LibInfo = li;
- DL = DAG.getTarget().getDataLayout();
+ DL = DAG.getSubtarget().getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
}
@@ -988,15 +993,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DebugLoc dl = DDI.getdl();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
MDNode *Variable = DI->getVariable();
+ MDNode *Expr = DI->getExpression();
uint64_t Offset = DI->getOffset();
// A dbg.value for an alloca is always indirect.
bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
SDDbgValue *SDV;
if (Val.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) {
- SDV = DAG.getDbgValue(Variable, Val.getNode(),
- Val.getResNo(), IsIndirect,
- Offset, dl, DbgSDNodeOrder);
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect,
+ Val)) {
+ SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
+ IsIndirect, Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1018,8 +1024,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
if (It != FuncInfo.ValueMap.end()) {
unsigned InReg = It->second;
- RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(),
- InReg, V->getType());
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
+ V->getType());
SDValue Chain = DAG.getEntryNode();
N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
resolveDanglingDebugInfo(V, N);
@@ -1050,10 +1056,10 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
/// getValueImpl - Helper function for getValue and getNonRegisterValue.
/// Create an SDValue for the given value.
SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (const Constant *C = dyn_cast<Constant>(V)) {
- EVT VT = TLI->getValueType(V->getType(), true);
+ EVT VT = TLI.getValueType(V->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
return DAG.getConstant(*CI, VT);
@@ -1063,7 +1069,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ConstantPointerNull>(C)) {
unsigned AS = V->getType()->getPointerAddressSpace();
- return DAG.getConstant(0, TLI->getPointerTy(AS));
+ return DAG.getConstant(0, TLI.getPointerTy(AS));
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
@@ -1117,7 +1123,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
"Unknown struct or array constant!");
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, C->getType(), ValueVTs);
+ ComputeValueVTs(TLI, C->getType(), ValueVTs);
unsigned NumElts = ValueVTs.size();
if (NumElts == 0)
return SDValue(); // empty struct
@@ -1149,7 +1155,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Ops.push_back(getValue(CV->getOperand(i)));
} else {
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
- EVT EltVT = TLI->getValueType(VecTy->getElementType());
+ EVT EltVT = TLI.getValueType(VecTy->getElementType());
SDValue Op;
if (EltVT.isFloatingPoint())
@@ -1169,13 +1175,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
- return DAG.getFrameIndex(SI->second, TLI->getPointerTy());
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
}
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
- RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType());
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -1184,7 +1190,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
SmallVector<SDValue, 8> OutVals;
@@ -1197,7 +1203,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// Leave Outs empty so that LowerReturn won't try to load return
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
- ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()),
+ ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
PtrValueVTs);
SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
@@ -1205,7 +1211,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
@@ -1224,7 +1230,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
MVT::Other, Chains);
} else if (I.getNumOperands() != 0) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs);
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
@@ -1242,10 +1248,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ExtendKind = ISD::ZERO_EXTEND;
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
- VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind);
+ VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
- unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT);
- MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT);
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+ MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
@@ -1275,9 +1281,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
CallingConv::ID CallConv =
DAG.getMachineFunction().getFunction()->getCallingConv();
- Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg,
- Outs, OutVals, getCurSDLoc(),
- DAG);
+ Chain = DAG.getTargetLoweringInfo().LowerReturn(
+ Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
// Verify that the target's LowerReturn behaved as expected.
assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -1601,10 +1606,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (!TM.getTargetLowering()->isJumpExpensive() &&
- BOp->hasOneUse() &&
- (BOp->getOpcode() == Instruction::And ||
- BOp->getOpcode() == Instruction::Or)) {
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() &&
+ BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
getEdgeWeight(BrMBB, Succ1MBB));
@@ -1724,7 +1728,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
// Emit the code for the jump table
assert(JT.Reg != -1U && "Should lower JT Header first!");
- EVT PTy = TM.getTargetLowering()->getPointerTy();
+ EVT PTy = DAG.getTargetLoweringInfo().getPointerTy();
SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
JT.Reg, PTy);
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
@@ -1752,10 +1756,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// can be used as an index into the jump table in a subsequent basic block.
// This value may be smaller or larger than the target's pointer type, and
// therefore require extension or truncating.
- const TargetLowering *TLI = TM.getTargetLowering();
- SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy());
- unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy());
+ unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
@@ -1763,12 +1767,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the largest
// case in the switch.
- SDValue CMP = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(),
- Sub.getValueType()),
- Sub,
- DAG.getConstant(JTH.Last - JTH.First,VT),
- ISD::SETUGT);
+ SDValue CMP =
+ DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1799,8 +1801,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineBasicBlock *ParentBB) {
// First create the loads to the guard/stack slot for the comparison.
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT PtrTy = TLI->getPointerTy();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy();
MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI->getStackProtectorIndex();
@@ -1810,10 +1812,22 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
unsigned Align =
- TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
- SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
- GuardPtr, MachinePointerInfo(IRGuard, 0),
- true, false, false, Align);
+ TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
+
+ SDValue Guard;
+
+ // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the
+ // guard value from the virtual register holding the value. Otherwise, emit a
+ // volatile load to retrieve the stack guard value.
+ unsigned GuardReg = SPD.getGuardReg();
+
+ if (GuardReg && TLI.useLoadStackGuardNode())
+ Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg,
+ PtrTy);
+ else
+ Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+ GuardPtr, MachinePointerInfo(IRGuard, 0),
+ true, false, false, Align);
SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
StackSlotPtr,
@@ -1824,11 +1838,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
EVT VT = Guard.getValueType();
SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot);
- SDValue Cmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(0, VT),
- ISD::SETNE);
+ SDValue Cmp =
+ DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(0, VT), ISD::SETNE);
// If the sub is not 0, then we know the guard/stackslot do not equal, so
// branch to failure MBB.
@@ -1853,10 +1866,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
/// StackProtectorDescriptor.
void
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
- const TargetLowering *TLI = TM.getTargetLowering();
- SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL,
- MVT::isVoid, nullptr, 0, false,
- getCurSDLoc(), false, false).second;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Chain =
+ TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
+ nullptr, 0, false, getCurSDLoc(), false, false).second;
DAG.setRoot(Chain);
}
@@ -1871,16 +1884,15 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
DAG.getConstant(B.First, VT));
// Check range
- const TargetLowering *TLI = TM.getTargetLowering();
- SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(),
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue RangeCmp =
+ DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
Sub.getValueType()),
- Sub, DAG.getConstant(B.Range, VT),
- ISD::SETUGT);
+ Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT);
// Determine the type of the test operands.
bool UsePtrType = false;
- if (!TLI->isTypeLegal(VT))
+ if (!TLI.isTypeLegal(VT))
UsePtrType = true;
else {
for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
@@ -1892,7 +1904,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
}
}
if (UsePtrType) {
- VT = TLI->getPointerTy();
+ VT = TLI.getPointerTy();
Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT);
}
@@ -1936,22 +1948,18 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
Reg, VT);
SDValue Cmp;
unsigned PopCount = CountPopulation_64(B.Mask);
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
- Cmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(), VT),
- ShiftOp,
- DAG.getConstant(countTrailingZeros(B.Mask), VT),
- ISD::SETEQ);
+ Cmp = DAG.getSetCC(
+ getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
+ DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
- Cmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(), VT),
- ShiftOp,
- DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
- ISD::SETNE);
+ Cmp = DAG.getSetCC(
+ getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
+ DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT,
@@ -1961,9 +1969,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(),
VT, SwitchVal, DAG.getConstant(B.Mask, VT));
Cmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(), VT),
- AndOp, DAG.getConstant(0, VT),
- ISD::SETNE);
+ TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp,
+ DAG.getConstant(0, VT), ISD::SETNE);
}
// The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
@@ -2001,8 +2008,17 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
if (isa<InlineAsm>(Callee))
visitInlineAsm(&I);
else if (Fn && Fn->isIntrinsic()) {
- assert(Fn->getIntrinsicID() == Intrinsic::donothing);
- // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ switch (Fn->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Cannot invoke this intrinsic");
+ case Intrinsic::donothing:
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ visitPatchpoint(&I, LandingPad);
+ break;
+ }
} else
LowerCallTo(&I, getValue(Callee), false, LandingPad);
@@ -2034,26 +2050,26 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
- const TargetLowering *TLI = TM.getTargetLowering();
- if (TLI->getExceptionPointerRegister() == 0 &&
- TLI->getExceptionSelectorRegister() == 0)
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.getExceptionPointerRegister() == 0 &&
+ TLI.getExceptionSelectorRegister() == 0)
return;
SmallVector<EVT, 2> ValueVTs;
- ComputeValueVTs(*TLI, LP.getType(), ValueVTs);
+ ComputeValueVTs(TLI, LP.getType(), ValueVTs);
assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
// Get the two live-in registers as SDValues. The physregs have already been
// copied into virtual registers.
SDValue Ops[2];
Ops[0] = DAG.getZExtOrTrunc(
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()),
- getCurSDLoc(), ValueVTs[0]);
+ DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+ getCurSDLoc(), ValueVTs[0]);
Ops[1] = DAG.getZExtOrTrunc(
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()),
- getCurSDLoc(), ValueVTs[1]);
+ DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
+ getCurSDLoc(), ValueVTs[1]);
// Merge into one.
SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
@@ -2218,9 +2234,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
static inline bool areJTsAllowed(const TargetLowering &TLI) {
- return TLI.supportJumpTables() &&
- (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+ return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
}
static APInt ComputeRange(const APInt &First, const APInt &Last) {
@@ -2245,8 +2260,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
TSize += I->size();
- const TargetLowering *TLI = TM.getTargetLowering();
- if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries()))
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
return false;
APInt Range = ComputeRange(First, Last);
@@ -2327,7 +2342,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
}
// Create a jump table index for this jump table.
- unsigned JTEncoding = TLI->getJumpTableEncoding();
+ unsigned JTEncoding = TLI.getJumpTableEncoding();
unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
->createJumpTableIndex(DestBBs);
@@ -2347,7 +2362,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
- MachineBasicBlock* Default,
MachineBasicBlock* SwitchBB) {
// Get the MachineFunction which holds the current MBB. This is used when
// inserting any additional MBBs necessary to represent the switch.
@@ -2413,8 +2427,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
RSize -= J->size();
}
- const TargetLowering *TLI = TM.getTargetLowering();
- if (areJTsAllowed(*TLI)) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (areJTsAllowed(TLI)) {
// If our case is dense we *really* should handle it earlier!
assert((FMetric > 0) && "Should handle dense range earlier!");
} else {
@@ -2484,8 +2498,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
const Value* SV,
MachineBasicBlock* Default,
MachineBasicBlock* SwitchBB) {
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT PTy = TLI->getPointerTy();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PTy = TLI.getPointerTy();
unsigned IntPtrBits = PTy.getSizeInBits();
Case& FrontCase = *CR.Range.first;
@@ -2496,7 +2510,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
MachineFunction *CurMF = FuncInfo.MF;
// If target does not have legal shift left, do not emit bit tests at all.
- if (!TLI->isOperationLegal(ISD::SHL, PTy))
+ if (!TLI.isOperationLegal(ISD::SHL, PTy))
return false;
size_t numCmps = 0;
@@ -2601,21 +2615,19 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
BitTestBlock BTB(lowBound, cmpRange, SV,
-1U, MVT::Other, (CR.CaseBB == SwitchBB),
- CR.CaseBB, Default, BTC);
+ CR.CaseBB, Default, std::move(BTC));
if (CR.CaseBB == SwitchBB)
visitBitTestHeader(BTB, SwitchBB);
- BitTestCases.push_back(BTB);
+ BitTestCases.push_back(std::move(BTB));
return true;
}
/// Clusterify - Transform simple list of Cases into list of CaseRange's
-size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
- const SwitchInst& SI) {
- size_t numCmps = 0;
-
+void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
@@ -2653,13 +2665,15 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
}
}
- for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
- if (I->Low != I->High)
- // A range counts double, since it requires two compares.
- ++numCmps;
- }
+ DEBUG({
+ size_t numCmps = 0;
+ for (auto &I : Cases)
+ // A range counts double, since it requires two compares.
+ numCmps += I.Low != I.High ? 2 : 1;
- return numCmps;
+ dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << '\n';
+ });
}
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
@@ -2701,10 +2715,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// representing each one, and sort the vector so that we can efficiently
// create a binary search tree from them.
CaseVector Cases;
- size_t numCmps = Clusterify(Cases, SI);
- DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
- << ". Total compares: " << numCmps << '\n');
- (void)numCmps;
+ Clusterify(Cases, SI);
// Get the Value to be switched on and default basic blocks, which will be
// inserted into CaseBlock records, representing basic blocks in the binary
@@ -2738,7 +2749,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Emit binary tree. We need to pick a pivot, and push left and right ranges
// onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
- handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
+ handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB);
}
}
@@ -2749,7 +2760,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
SmallSet<BasicBlock*, 32> Done;
for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
BasicBlock *BB = I.getSuccessor(i);
- bool Inserted = Done.insert(BB);
+ bool Inserted = Done.insert(BB).second;
if (!Inserted)
continue;
@@ -2806,7 +2817,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType());
+ EVT ShiftTy =
+ DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType());
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -2861,8 +2873,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) {
if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
!isa<ConstantSDNode>(Op1) &&
isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
- setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2,
- getCurSDLoc(), DAG));
+ setValue(&I, DAG.getTargetLoweringInfo()
+ .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG));
else
setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(),
Op1, Op2));
@@ -2878,7 +2890,7 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Opcode = getICmpCondCode(predicate);
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
@@ -2893,13 +2905,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
ISD::CondCode Condition = getFCmpCondCode(predicate);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
}
void SelectionDAGBuilder::visitSelect(const User &I) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
@@ -2926,7 +2938,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
void SelectionDAGBuilder::visitTrunc(const User &I) {
// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
}
@@ -2934,7 +2946,7 @@ void SelectionDAGBuilder::visitZExt(const User &I) {
// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// ZExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
}
@@ -2942,52 +2954,51 @@ void SelectionDAGBuilder::visitSExt(const User &I) {
// SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// SExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT DestVT = TLI->getValueType(I.getType());
- setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(),
- DestVT, N,
- DAG.getTargetConstant(0, TLI->getPointerTy())));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N,
+ DAG.getTargetConstant(0, TLI.getPointerTy())));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
// FPExt is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToUI(const User &I) {
// FPToUI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToSI(const User &I) {
// FPToSI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitUIToFP(const User &I) {
// UIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitSIToFP(const User &I) {
// SIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
}
@@ -2995,7 +3006,7 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
}
@@ -3003,13 +3014,13 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
}
void SelectionDAGBuilder::visitBitCast(const User &I) {
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
// BitCast assures us that source and destination are the same size so this is
// either a BITCAST or a no-op.
@@ -3031,7 +3042,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Value *SV = I.getOperand(0);
SDValue N = getValue(SV);
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(I.getType());
unsigned SrcAS = SV->getType()->getPointerAddressSpace();
unsigned DestAS = I.getType()->getPointerAddressSpace();
@@ -3049,8 +3060,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) {
SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)),
getCurSDLoc(), TLI.getVectorIdxTy());
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
- TM.getTargetLowering()->getValueType(I.getType()),
- InVec, InVal, InIdx));
+ TLI.getValueType(I.getType()), InVec, InVal, InIdx));
}
void SelectionDAGBuilder::visitExtractElement(const User &I) {
@@ -3059,8 +3069,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)),
getCurSDLoc(), TLI.getVectorIdxTy());
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
- TM.getTargetLowering()->getValueType(I.getType()),
- InVec, InIdx));
+ TLI.getValueType(I.getType()), InVec, InIdx));
}
// Utility for visitShuffleVector - Return true if every element in Mask,
@@ -3082,8 +3091,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
unsigned MaskNumElts = Mask.size();
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT VT = TLI->getValueType(I.getType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -3202,9 +3211,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (RangeUse[Input] == 0)
Src = DAG.getUNDEF(VT);
else
- Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT,
- Src, DAG.getConstant(StartIdx[Input],
- TLI->getVectorIdxTy()));
+ Src = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src,
+ DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy()));
}
// Calculate new mask.
@@ -3230,7 +3239,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// replacing the shuffle with extract and build vector.
// to insert and build vector.
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = TLI->getVectorIdxTy();
+ EVT IdxVT = TLI.getVectorIdxTy();
SmallVector<SDValue,8> Ops;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
@@ -3262,16 +3271,22 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> AggValueVTs;
- ComputeValueVTs(*TLI, AggTy, AggValueVTs);
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
SmallVector<EVT, 4> ValValueVTs;
- ComputeValueVTs(*TLI, ValTy, ValValueVTs);
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
unsigned NumAggValues = AggValueVTs.size();
unsigned NumValValues = ValValueVTs.size();
SmallVector<SDValue, 4> Values(NumAggValues);
+ // Ignore an insertvalue that produces an empty object
+ if (!NumAggValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
SDValue Agg = getValue(Op0);
unsigned i = 0;
// Copy the beginning value(s) from the original aggregate.
@@ -3302,9 +3317,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValValueVTs;
- ComputeValueVTs(*TLI, ValTy, ValValueVTs);
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
unsigned NumValValues = ValValueVTs.size();
@@ -3353,13 +3368,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Ty = cast<SequentialType>(Ty)->getElementType();
// If this is a constant subscript, handle it quickly.
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
uint64_t Offs =
DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
- EVT PTy = TLI->getPointerTy(AS);
+ EVT PTy = TLI.getPointerTy(AS);
unsigned PtrBits = PTy.getSizeInBits();
if (PtrBits < 64)
OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
@@ -3373,8 +3388,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
// N = N + Idx * ElementSize;
- APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
- DL->getTypeAllocSize(Ty));
+ APInt ElementSize =
+ APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -3411,15 +3426,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
return; // getValue will auto-populate this.
Type *Ty = I.getAllocatedType();
- const TargetLowering *TLI = TM.getTargetLowering();
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
unsigned Align =
- std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
- I.getAlignment());
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI->getPointerTy();
+ EVT IntPtr = TLI.getPointerTy();
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr);
@@ -3430,7 +3445,8 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ DAG.getSubtarget().getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
Align = 0;
@@ -3464,15 +3480,18 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata("nontemporal") != nullptr;
- bool isInvariant = I.getMetadata("invariant.load") != nullptr;
+ bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
+ bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
unsigned Alignment = I.getAlignment();
- const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3483,7 +3502,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
// Serialize volatile loads with other side effects.
Root = getRoot();
else if (AA->pointsToConstantMemory(
- AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3492,9 +3511,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Root = DAG.getRoot();
}
- const TargetLowering *TLI = TM.getTargetLowering();
if (isVolatile)
- Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+ Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
@@ -3520,7 +3538,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root,
A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
- isNonTemporal, isInvariant, Alignment, TBAAInfo,
+ isNonTemporal, isInvariant, Alignment, AAInfo,
Ranges);
Values[i] = L;
@@ -3549,7 +3567,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(),
+ ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3565,9 +3584,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
NumValues));
EVT PtrVT = Ptr.getValueType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata("nontemporal") != nullptr;
+ bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
unsigned Alignment = I.getAlignment();
- const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
@@ -3583,7 +3604,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue St = DAG.getStore(Root, getCurSDLoc(),
SDValue(Src.getNode(), Src.getResNo() + i),
Add, MachinePointerInfo(PtrV, Offsets[i]),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
Chains[ChainI] = St;
}
@@ -3592,30 +3613,6 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
DAG.setRoot(StoreNode);
}
-static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
- SynchronizationScope Scope,
- bool Before, SDLoc dl,
- SelectionDAG &DAG,
- const TargetLowering &TLI) {
- // Fence, if necessary
- if (Before) {
- if (Order == AcquireRelease || Order == SequentiallyConsistent)
- Order = Release;
- else if (Order == Acquire || Order == Monotonic || Order == Unordered)
- return Chain;
- } else {
- if (Order == AcquireRelease)
- Order = Acquire;
- else if (Order == Release || Order == Monotonic || Order == Unordered)
- return Chain;
- }
- SDValue Ops[3];
- Ops[0] = Chain;
- Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
- Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
- return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops);
-}
-
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrder = I.getSuccessOrdering();
@@ -3624,27 +3621,16 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDValue InChain = getRoot();
- const TargetLowering *TLI = TM.getTargetLowering();
- if (TLI->getInsertFencesForAtomic())
- InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
- DAG, *TLI);
-
MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
SDValue L = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
- 0 /* Alignment */,
- TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
- TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope);
+ /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope);
SDValue OutChain = L.getValue(2);
- if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
- DAG, *TLI);
-
setValue(&I, L);
DAG.setRoot(OutChain);
}
@@ -3671,38 +3657,28 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
SDValue InChain = getRoot();
- const TargetLowering *TLI = TM.getTargetLowering();
- if (TLI->getInsertFencesForAtomic())
- InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
- DAG, *TLI);
-
SDValue L =
DAG.getAtomic(NT, dl,
getValue(I.getValOperand()).getSimpleValueType(),
InChain,
getValue(I.getPointerOperand()),
getValue(I.getValOperand()),
- I.getPointerOperand(), 0 /* Alignment */,
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
- Scope);
+ I.getPointerOperand(),
+ /* Alignment=*/ 0, Order, Scope);
SDValue OutChain = L.getValue(1);
- if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
- DAG, *TLI);
-
setValue(&I, L);
DAG.setRoot(OutChain);
}
void SelectionDAGBuilder::visitFence(const FenceInst &I) {
SDLoc dl = getCurSDLoc();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Ops[3];
Ops[0] = getRoot();
- Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy());
- Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy());
+ Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
@@ -3713,8 +3689,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDValue InChain = getRoot();
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT VT = TLI->getValueType(I.getType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getType());
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
@@ -3728,19 +3704,14 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
I.getAlignment() ? I.getAlignment() :
DAG.getEVTAlignment(VT));
- InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG);
+ InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
getValue(I.getPointerOperand()), MMO,
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
- Scope);
+ Order, Scope);
SDValue OutChain = L.getValue(1);
- if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
- DAG, *TLI);
-
setValue(&I, L);
DAG.setRoot(OutChain);
}
@@ -3753,28 +3724,19 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue InChain = getRoot();
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT VT = TLI->getValueType(I.getValueOperand()->getType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getValueOperand()->getType());
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
- if (TLI->getInsertFencesForAtomic())
- InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
- DAG, *TLI);
-
SDValue OutChain =
DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
InChain,
getValue(I.getPointerOperand()),
getValue(I.getValueOperand()),
I.getPointerOperand(), I.getAlignment(),
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
- Scope);
-
- if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
- DAG, *TLI);
+ Order, Scope);
DAG.setRoot(OutChain);
}
@@ -3799,13 +3761,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Info is set by getTgtMemInstrinsic
TargetLowering::IntrinsicInfo Info;
- const TargetLowering *TLI = TM.getTargetLowering();
- bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy()));
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
@@ -3814,7 +3776,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, I.getType(), ValueVTs);
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
@@ -3829,7 +3791,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.vol,
- Info.readMem, Info.writeMem);
+ Info.readMem, Info.writeMem, Info.size);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -3848,7 +3810,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (!I.getType()->isVoidTy()) {
if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
- EVT VT = TLI->getValueType(PTy);
+ EVT VT = TLI.getValueType(PTy);
Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
}
@@ -4555,16 +4517,17 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
/// argument, create the corresponding DBG_VALUE machine instruction for it now.
/// At the end of instruction selection, they will be inserted to the entry BB.
-bool
-SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
- int64_t Offset, bool IsIndirect,
- const SDValue &N) {
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
+ MDNode *Variable,
+ MDNode *Expr, int64_t Offset,
+ bool IsIndirect,
+ const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
- const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
// Ignore inlined function arguments here.
DIVariable DV(Variable);
@@ -4610,14 +4573,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
return false;
if (Op->isReg())
- FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(),
- TII->get(TargetOpcode::DBG_VALUE),
- IsIndirect,
- Op->getReg(), Offset, Variable));
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE),
+ IsIndirect, Op->getReg(), Offset, Variable, Expr));
else
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op).addImm(Offset).addMetadata(Variable));
+ BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
+ .addOperand(*Op)
+ .addImm(Offset)
+ .addMetadata(Variable)
+ .addMetadata(Expr));
return true;
}
@@ -4635,7 +4600,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
/// otherwise lower it and return null.
const char *
SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc sdl = getCurSDLoc();
DebugLoc dl = getCurDebugLoc();
SDValue Res;
@@ -4649,17 +4614,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::vaend: visitVAEnd(I); return nullptr;
case Intrinsic::vacopy: visitVACopy(I); return nullptr;
case Intrinsic::returnaddress:
- setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(),
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(),
getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::frameaddress:
- setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(),
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg));
- EVT VT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT VT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName));
return nullptr;
}
@@ -4673,9 +4638,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::setjmp:
- return &"_setjmp"[!TLI->usesUnderscoreSetJmp()];
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
case Intrinsic::longjmp:
- return &"_longjmp"[!TLI->usesUnderscoreLongJmp()];
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
// Assert for address < 256 since we support only user defined address
// spaces.
@@ -4736,6 +4701,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
MDNode *Variable = DI.getVariable();
+ MDNode *Expression = DI.getExpression();
const Value *Address = DI.getAddress();
DIVariable DIVar(Variable);
assert((!DIVar || DIVar.isVariable()) &&
@@ -4771,16 +4737,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (FINode)
// Byval parameter. We have a frame index at this point.
- SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(),
- 0, dl, SDNodeOrder);
+ SDV = DAG.getFrameIndexDbgValue(
+ Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder);
else {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, 0, false, N);
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N);
return nullptr;
}
} else if (AI)
- SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, 0, dl, SDNodeOrder);
else {
// Can't do anything with other non-AI cases yet.
@@ -4793,7 +4759,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) {
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false,
+ N)) {
// If variable is pinned by a alloca in dominating bb then
// use StaticAllocaMap.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
@@ -4801,7 +4768,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- SDV = DAG.getFrameIndexDbgValue(Variable, SI->second,
+ SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second,
0, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
return nullptr;
@@ -4822,6 +4789,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
MDNode *Variable = DI.getVariable();
+ MDNode *Expression = DI.getExpression();
uint64_t Offset = DI.getOffset();
const Value *V = DI.getValue();
if (!V)
@@ -4829,7 +4797,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDDbgValue *SDV;
if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
- SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+ SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl,
+ SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
} else {
// Do not use getValue() in here; we don't want to generate code at
@@ -4841,10 +4810,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (N.getNode()) {
// A dbg.value for an alloca is always indirect.
bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
- if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) {
- SDV = DAG.getDbgValue(Variable, N.getNode(),
- N.getResNo(), IsIndirect,
- Offset, dl, SDNodeOrder);
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset,
+ IsIndirect, N)) {
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
+ IsIndirect, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
} else if (!V->use_empty() ) {
@@ -4878,7 +4847,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
- GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
+ GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, MVT::i32);
setValue(&I, Res);
@@ -4899,15 +4868,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
case Intrinsic::eh_dwarf_cfa: {
SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
- TLI->getPointerTy());
+ TLI.getPointerTy());
SDValue Offset = DAG.getNode(ISD::ADD, sdl,
CfaArg.getValueType(),
DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
CfaArg.getValueType()),
CfaArg);
- SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl,
- TLI->getPointerTy(),
- DAG.getConstant(0, TLI->getPointerTy()));
+ SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
+ DAG.getConstant(0, TLI.getPointerTy()));
setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
FA, Offset));
return nullptr;
@@ -4997,7 +4965,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
- EVT DestVT = TLI->getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(I.getType());
ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
DAG.getConstant(NewIntrinsic, MVT::i32),
@@ -5009,14 +4977,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::x86_avx_vinsertf128_ps_256:
case Intrinsic::x86_avx_vinsertf128_si_256:
case Intrinsic::x86_avx2_vinserti128: {
- EVT DestVT = TLI->getValueType(I.getType());
- EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType());
+ EVT DestVT = TLI.getValueType(I.getType());
+ EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
ElVT.getVectorNumElements();
- Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- DAG.getConstant(Idx, TLI->getVectorIdxTy()));
+ Res =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
+ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
+ DAG.getConstant(Idx, TLI.getVectorIdxTy()));
setValue(&I, Res);
return nullptr;
}
@@ -5024,12 +4992,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::x86_avx_vextractf128_ps_256:
case Intrinsic::x86_avx_vextractf128_si_256:
case Intrinsic::x86_avx2_vextracti128: {
- EVT DestVT = TLI->getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(I.getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
DestVT.getVectorNumElements();
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
getValue(I.getArgOperand(0)),
- DAG.getConstant(Idx, TLI->getVectorIdxTy()));
+ DAG.getConstant(Idx, TLI.getVectorIdxTy()));
setValue(&I, Res);
return nullptr;
}
@@ -5055,7 +5023,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::convertus: Code = ISD::CVT_US; break;
case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
}
- EVT DestVT = TLI->getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(I.getType());
const Value *Op1 = I.getArgOperand(0);
Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
DAG.getValueType(DestVT),
@@ -5071,23 +5039,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)), DAG));
return nullptr;
case Intrinsic::log:
- setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::log2:
- setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::log10:
- setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::exp:
- setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::exp2:
- setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)), DAG, *TLI));
+ getValue(I.getArgOperand(1)), DAG, TLI));
return nullptr;
case Intrinsic::sqrt:
case Intrinsic::fabs:
@@ -5119,6 +5087,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return nullptr;
}
+ case Intrinsic::minnum:
+ setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ case Intrinsic::maxnum:
+ setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5133,9 +5113,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(2))));
return nullptr;
case Intrinsic::fmuladd: {
- EVT VT = TLI->getValueType(I.getType());
+ EVT VT = TLI.getValueType(I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI->isFMAFasterThanFMulAndFAdd(VT)) {
+ TLI.isFMAFasterThanFMulAndFAdd(VT)) {
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
@@ -5155,12 +5135,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::convert_to_fp16:
- setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl,
- MVT::i16, getValue(I.getArgOperand(0))));
+ setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
+ DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant(0, MVT::i32))));
return nullptr;
case Intrinsic::convert_from_fp16:
- setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl,
- MVT::f32, getValue(I.getArgOperand(0))));
+ setValue(&I,
+ DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()),
+ DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)))));
return nullptr;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
@@ -5205,7 +5189,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(ISD::STACKSAVE, sdl,
- DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op);
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return nullptr;
@@ -5219,9 +5203,44 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- EVT PtrTy = TLI->getPointerTy();
+ EVT PtrTy = TLI.getPointerTy();
+ SDValue Src, Chain = getRoot();
+ const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand();
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
+
+ // See if Ptr is a bitcast. If it is, look through it and see if we can get
+ // global variable __stack_chk_guard.
+ if (!GV)
+ if (const Operator *BC = dyn_cast<Operator>(Ptr))
+ if (BC->getOpcode() == Instruction::BitCast)
+ GV = dyn_cast<GlobalVariable>(BC->getOperand(0));
+
+ if (GV && TLI.useLoadStackGuardNode()) {
+ // Emit a LOAD_STACK_GUARD node.
+ MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD,
+ sdl, PtrTy, Chain);
+ MachinePointerInfo MPInfo(GV);
+ MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
+ unsigned Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOInvariant;
+ *MemRefs = MF.getMachineMemOperand(MPInfo, Flags,
+ PtrTy.getSizeInBits() / 8,
+ DAG.getEVTAlignment(PtrTy));
+ Node->setMemRefs(MemRefs, MemRefs + 1);
+
+ // Copy the guard value to a virtual register so that it can be
+ // retrieved in the epilogue.
+ Src = SDValue(Node, 0);
+ const TargetRegisterClass *RC =
+ TLI.getRegClassFor(Src.getSimpleValueType());
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
+
+ SPDescriptor.setGuardReg(Reg);
+ Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src);
+ } else {
+ Src = getValue(I.getArgOperand(0)); // The guard's value.
+ }
- SDValue Src = getValue(I.getArgOperand(0)); // The guard's value.
AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
int FI = FuncInfo.StaticAllocaMap[Slot];
@@ -5230,7 +5249,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(getRoot(), sdl, Src, FIN,
+ Res = DAG.getStore(Chain, sdl, Src, FIN,
MachinePointerInfo::getFixedStack(FI),
true, false, 0);
setValue(&I, Res);
@@ -5259,8 +5278,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return nullptr;
+ case Intrinsic::assume:
case Intrinsic::var_annotation:
- // Discard annotate attributes
+ // Discard annotate attributes and assumptions
return nullptr;
case Intrinsic::init_trampoline: {
@@ -5281,7 +5301,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::adjust_trampoline: {
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
- TLI->getPointerTy(),
+ TLI.getPointerTy(),
getValue(I.getArgOperand(0))));
return nullptr;
}
@@ -5321,10 +5341,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(sdl).setChain(getRoot())
.setCallee(CallingConv::C, I.getType(),
- DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()),
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
std::move(Args), 0);
- std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return nullptr;
}
@@ -5388,11 +5408,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (!LifetimeObject)
continue;
- int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
+ // First check that the Alloca is static, otherwise it won't have a
+ // valid frame index.
+ auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return nullptr;
+
+ int FI = SI->second;
SDValue Ops[2];
Ops[0] = getRoot();
- Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true);
+ Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
@@ -5402,7 +5428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::invariant_start:
// Discard region information.
- setValue(&I, DAG.getUNDEF(TLI->getPointerTy()));
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
return nullptr;
case Intrinsic::invariant_end:
// Discard region information.
@@ -5420,7 +5446,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::clear_cache:
- return TLI->getClearCacheBuiltinName();
+ return TLI.getClearCacheBuiltinName();
case Intrinsic::donothing:
// ignore
return nullptr;
@@ -5430,42 +5456,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64: {
- visitPatchpoint(I);
+ visitPatchpoint(&I);
return nullptr;
}
}
}
-void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
- bool isTailCall,
- MachineBasicBlock *LandingPad) {
- const TargetLowering *TLI = TM.getTargetLowering();
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- Type *RetTy = FTy->getReturnType();
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ MachineBasicBlock *LandingPad) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
MCSymbol *BeginLabel = nullptr;
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Args.reserve(CS.arg_size());
-
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
- const Value *V = *i;
-
- // Skip empty types
- if (V->getType()->isEmptyTy())
- continue;
-
- SDValue ArgNode = getValue(V);
- Entry.Node = ArgNode; Entry.Ty = V->getType();
-
- // Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
- Args.push_back(Entry);
- }
-
if (LandingPad) {
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
@@ -5486,24 +5488,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// this call might not return.
(void)getRoot();
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel));
- }
- // Check if target-independent constraints permit a tail call here.
- // Target-dependent constraints are checked within TLI->LowerCallTo.
- if (isTailCall && !isInTailCallPosition(CS, DAG))
- isTailCall = false;
+ CLI.setChain(getRoot());
+ }
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall);
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
- std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI);
- assert((isTailCall || Result.second.getNode()) &&
+ assert((CLI.IsTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
"Null value expected with tail call!");
- if (Result.first.getNode())
- setValue(CS.getInstruction(), Result.first);
if (!Result.second.getNode()) {
// As a special case, a null chain means that a tail call has been emitted
@@ -5526,6 +5521,50 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Inform MachineModuleInfo of range.
MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
}
+
+ return Result;
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+ bool isTailCall,
+ MachineBasicBlock *LandingPad) {
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FTy->getReturnType();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Args.push_back(Entry);
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI->LowerCallTo.
+ if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
+ isTailCall = false;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
+ .setTailCall(isTailCall);
+ std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad);
+
+ if (Result.first.getNode())
+ setValue(CS.getInstruction(), Result.first);
}
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@@ -5591,7 +5630,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
- EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true);
+ EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
if (IsSigned)
Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
else
@@ -5616,7 +5655,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
if (CSize && CSize->getZExtValue() == 0) {
- EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true);
+ EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
setValue(&I, DAG.getConstant(0, CallVT));
return true;
}
@@ -5673,15 +5712,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// Require that we can find a legal MVT, and only do this if the target
// supports unaligned loads of that type. Expanding into byte loads would
// bloat the code.
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (ActuallyDoIt && CSize->getZExtValue() > 4) {
unsigned DstAS = LHS->getType()->getPointerAddressSpace();
unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
- if (!TLI->isTypeLegal(LoadVT) ||
- !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) ||
- !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS))
+ // TODO: Check alignment of src and dest ptrs.
+ if (!TLI.isTypeLegal(LoadVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS))
ActuallyDoIt = false;
}
@@ -5859,6 +5899,26 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
return true;
}
+/// visitBinaryFloatCall - If a call instruction is a binary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a binary floating-point call.
+ if (I.getNumArgOperands() != 2 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ I.getType() != I.getArgOperand(1)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp0 = getValue(I.getArgOperand(0));
+ SDValue Tmp1 = getValue(I.getArgOperand(1));
+ EVT VT = Tmp0.getValueType();
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
+ return true;
+}
+
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
if (isa<InlineAsm>(I.getCalledValue())) {
@@ -5915,6 +5975,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
+ case LibFunc::fmin:
+ case LibFunc::fminf:
+ case LibFunc::fminl:
+ if (visitBinaryFloatCall(I, ISD::FMINNUM))
+ return;
+ break;
+ case LibFunc::fmax:
+ case LibFunc::fmaxf:
+ case LibFunc::fmaxl:
+ if (visitBinaryFloatCall(I, ISD::FMAXNUM))
+ return;
+ break;
case LibFunc::sin:
case LibFunc::sinf:
case LibFunc::sinl:
@@ -6021,7 +6093,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Callee = getValue(I.getCalledValue());
else
Callee = DAG.getExternalSymbol(RenameFn,
- TM.getTargetLowering()->getPointerTy());
+ DAG.getTargetLoweringInfo().getPointerTy());
// Check if we can potentially perform a tail call. More detailed checking is
// be done within LowerCallTo, after more information about the call is known.
@@ -6216,9 +6288,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
/// ConstraintOperands - Information about all of the constraints.
SDISelAsmOperandInfoVector ConstraintOperands;
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(CS);
+ TargetConstraints = TLI.ParseConstraints(CS);
bool hasMemory = false;
@@ -6243,10 +6315,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// corresponding argument.
assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
- OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo));
+ OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpVT = TLI->getSimpleValueType(CS.getType());
+ OpVT = TLI.getSimpleValueType(CS.getType());
}
++ResNo;
break;
@@ -6267,8 +6339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL).
- getSimpleVT();
+ OpVT =
+ OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT();
}
OpInfo.ConstraintVT = OpVT;
@@ -6279,7 +6351,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
else {
for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
TargetLowering::ConstraintType
- CType = TLI->getConstraintType(OpInfo.Codes[j]);
+ CType = TLI.getConstraintType(OpInfo.Codes[j]);
if (CType == TargetLowering::C_Memory) {
hasMemory = true;
break;
@@ -6311,10 +6383,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
std::pair<unsigned, const TargetRegisterClass*> InputRC =
- TLI->getRegForInlineAsmConstraint(Input.ConstraintCode,
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
@@ -6328,7 +6400,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.Type == InlineAsm::isClobber)
@@ -6356,16 +6428,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
- TLI->getPointerTy());
+ TLI.getPointerTy());
} else {
// Otherwise, create a stack slot and emit a store to it before the
// asm.
Type *Ty = OpVal->getType();
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
- unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty);
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy());
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
Chain = DAG.getStore(Chain, getCurSDLoc(),
OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
@@ -6383,7 +6455,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo);
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
}
// Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -6394,7 +6466,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo);
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6402,7 +6474,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
AsmNodeOperands.push_back(
DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
// If we have a !srcloc metadata node associated with it, we want to attach
// this to the ultimately generated inline asm machineinstr. To do this, we
@@ -6425,7 +6497,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
// Ideally, we would only check against memory constraints. However, the
// meaning of an other constraint can be target-specific and we can't easily
@@ -6443,7 +6515,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
@@ -6465,7 +6537,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Add information to the INLINEASM node to know about this output.
unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
}
@@ -6545,7 +6617,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
i != e; ++i) {
- if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT))
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
else {
LLVMContext &Ctx = *DAG.getContext();
@@ -6572,7 +6644,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
OpInfo.getMatchedOperand());
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
break;
}
@@ -6584,7 +6656,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
- TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
LLVMContext &Ctx = *DAG.getContext();
@@ -6598,20 +6670,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
unsigned ResOpType =
InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
break;
}
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
- assert(InOperandVal.getValueType() == TLI->getPointerTy() &&
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
"Memory operands expect pointer values");
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
AsmNodeOperands.push_back(InOperandVal);
break;
}
@@ -6674,7 +6746,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// FIXME: Why don't we do this for inline asms with MRVs?
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
- EVT ResultType = TLI->getValueType(CS.getType());
+ EVT ResultType = TLI.getValueType(CS.getType());
// If any of the results of the inline asm is a vector, it may have the
// wrong width/num elts. This can happen for register classes that can
@@ -6739,9 +6811,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
}
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
- const TargetLowering *TLI = TM.getTargetLowering();
- const DataLayout &DL = *TLI->getDataLayout();
- SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(),
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = *TLI.getDataLayout();
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(),
getRoot(), getValue(I.getOperand(0)),
DAG.getSrcValue(I.getOperand(0)),
DL.getABITypeAlignment(I.getType()));
@@ -6773,18 +6845,18 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
std::pair<SDValue, SDValue>
-SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
+SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
- bool useVoidTy) {
+ bool UseVoidTy,
+ MachineBasicBlock *LandingPad) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
// Populate the argument list.
// Attributes for args start at offset 1, after the return attribute.
- ImmutableCallSite CS(&CI);
for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
ArgI != ArgE; ++ArgI) {
- const Value *V = CI.getOperand(ArgI);
+ const Value *V = CS->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
@@ -6795,14 +6867,13 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
Args.push_back(Entry);
}
- Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
+ Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
- .setDiscardResult(!CI.use_empty());
+ .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
+ .setDiscardResult(CS->use_empty());
- const TargetLowering *TLI = TM.getTargetLowering();
- return TLI->LowerCallTo(CLI);
+ return lowerInvokable(CLI, LandingPad);
}
/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
@@ -6822,11 +6893,11 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
/// only available in a register, then the runtime would need to trap when
/// execution reaches the StackMap in order to read the alloca's location.
-static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx,
+static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
- for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) {
- SDValue OpVal = Builder.getValue(CI.getArgOperand(i));
+ for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
+ SDValue OpVal = Builder.getValue(CS.getArgument(i));
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
Ops.push_back(
Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
@@ -6877,7 +6948,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
// Push live variables for the stack map.
- addStackMapLiveVars(CI, 2, Ops, *this);
+ addStackMapLiveVars(&CI, 2, Ops, *this);
// We are not pushing any register mask info here on the operands list,
// because the stackmap doesn't clobber anything.
@@ -6904,7 +6975,8 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
}
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
-void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
+void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
+ MachineBasicBlock *LandingPad) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
@@ -6912,32 +6984,29 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// [Args...],
// [live variables...])
- CallingConv::ID CC = CI.getCallingConv();
- bool isAnyRegCC = CC == CallingConv::AnyReg;
- bool hasDef = !CI.getType()->isVoidTy();
- SDValue Callee = getValue(CI.getOperand(2)); // <target>
+ CallingConv::ID CC = CS.getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !CS->getType()->isVoidTy();
+ SDValue Callee = getValue(CS->getOperand(2)); // <target>
// Get the real number of arguments participating in the call <numArgs>
- SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos));
+ SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// Intrinsics include all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
- assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
- unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs;
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
std::pair<SDValue, SDValue> Result =
- LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC);
-
- // Set the root to the target-lowered call chain.
- SDValue Chain = Result.second;
- DAG.setRoot(Chain);
+ lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC,
+ LandingPad);
- SDNode *CallEnd = Chain.getNode();
- if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+ SDNode *CallEnd = Result.second.getNode();
+ if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
CallEnd = CallEnd->getOperand(0).getNode();
/// Get a call instruction from the call sequence chain.
@@ -6945,16 +7014,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
"Expected a callseq node.");
SDNode *Call = CallEnd->getOperand(0).getNode();
- bool hasGlue = Call->getGluedNode();
+ bool HasGlue = Call->getGluedNode();
// Replace the target specific call node with the patchable intrinsic.
SmallVector<SDValue, 8> Ops;
// Add the <id> and <numBytes> constants.
- SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+ SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
- SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+ SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
@@ -6967,8 +7036,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// Adjust <numArgs> to account for any arguments that have been passed on the
// stack instead.
// Call Node: Chain, Target, {Args}, RegMask, [Glue]
- unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3);
- NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs;
+ unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
+ NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
// Add the calling convention
@@ -6976,20 +7045,20 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// Add the arguments we omitted previously. The register allocator should
// place these in any free register.
- if (isAnyRegCC)
+ if (IsAnyRegCC)
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
- Ops.push_back(getValue(CI.getArgOperand(i)));
+ Ops.push_back(getValue(CS.getArgument(i)));
// Push the arguments from the call instruction up to the register mask.
- SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1;
+ SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
Ops.push_back(*i);
// Push live variables for the stack map.
- addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this);
+ addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this);
// Push the register mask info.
- if (hasGlue)
+ if (HasGlue)
Ops.push_back(*(Call->op_end()-2));
else
Ops.push_back(*(Call->op_end()-1));
@@ -6999,15 +7068,15 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
Ops.push_back(*(Call->op_begin()));
// Push the glue flag (last operand).
- if (hasGlue)
+ if (HasGlue)
Ops.push_back(*(Call->op_end()-1));
SDVTList NodeTys;
- if (isAnyRegCC && hasDef) {
+ if (IsAnyRegCC && HasDef) {
// Create the return types based on the intrinsic definition
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 3> ValueVTs;
- ComputeValueVTs(TLI, CI.getType(), ValueVTs);
+ ComputeValueVTs(TLI, CS->getType(), ValueVTs);
assert(ValueVTs.size() == 1 && "Expected only one return value type.");
// There is always a chain and a glue type at the end
@@ -7022,18 +7091,18 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
getCurSDLoc(), NodeTys, Ops);
// Update the NodeMap.
- if (hasDef) {
- if (isAnyRegCC)
- setValue(&CI, SDValue(MN, 0));
+ if (HasDef) {
+ if (IsAnyRegCC)
+ setValue(CS.getInstruction(), SDValue(MN, 0));
else
- setValue(&CI, Result.first);
+ setValue(CS.getInstruction(), Result.first);
}
// Fixup the consumers of the intrinsic. The chain and glue may be used in the
// call sequence. Furthermore the location of the chain and glue can change
// when the AnyReg calling convention is used and the intrinsic returns a
// value.
- if (isAnyRegCC && hasDef) {
+ if (IsAnyRegCC && HasDef) {
SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
@@ -7182,8 +7251,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
if (Args[i].isNest)
Flags.setNest();
- if (NeedsRegBlock)
+ if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
+ if (Value == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
@@ -7229,10 +7301,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
- // Only mark the end at the last register of the last value.
- if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1)
- MyFlags.Flags.setInConsecutiveRegsLast();
-
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
@@ -7345,10 +7413,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
"Copy from a reg to the same reg!");
assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
- const TargetLowering *TLI = TM.getTargetLowering();
- RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
- RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V);
+
+ ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
+ FuncInfo.PreferredExtendType.end())
+ ? ISD::ANY_EXTEND
+ : FuncInfo.PreferredExtendType[V];
+ RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
@@ -7374,15 +7447,13 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
- const TargetLowering *TLI = getTargetLowering();
const DataLayout *DL = TLI->getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
- ComputeValueVTs(*getTargetLowering(),
- PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
// or one register.
@@ -7447,8 +7518,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
- if (NeedsRegBlock)
+ if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
+ if (Value == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -7461,11 +7535,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// if it isn't first piece, alignment must be 1
else if (i > 0)
MyFlags.Flags.setOrigAlign(1);
-
- // Only mark the end at the last register of the last value.
- if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1)
- MyFlags.Flags.setInConsecutiveRegsLast();
-
Ins.push_back(MyFlags);
}
PartBase += VT.getStoreSize();
@@ -7474,9 +7543,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Call the target to set up the argument values.
SmallVector<SDValue, 8> InVals;
- SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
- F.isVarArg(), Ins,
- dl, DAG, InVals);
+ SDValue NewRoot = TLI->LowerFormalArguments(
+ DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
// Verify that the target's LowerFormalArguments behaved as expected.
assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
@@ -7513,8 +7581,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MachineRegisterInfo& RegInfo = MF.getRegInfo();
unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
FuncInfo->DemoteRegister = SRetReg;
- NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(),
- SRetReg, ArgValue);
+ NewRoot =
+ SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
DAG.setRoot(NewRoot);
// i indexes lowered arguments. Bump it past the hidden sret argument.
@@ -7629,7 +7697,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// If this terminator has multiple identical successors (common for
// switches), only handle each succ once.
- if (!SuccsHandled.insert(SuccMBB)) continue;
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
MachineBasicBlock::iterator MBBI = SuccMBB->begin();
@@ -7672,11 +7741,11 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Remember that this register needs to added to the machine PHI node as
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
- const TargetLowering *TLI = TM.getTargetLowering();
- ComputeValueVTs(*TLI, PN->getType(), ValueVTs);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
- unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT);
+ unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
for (unsigned i = 0, e = NumRegisters; i != e; ++i)
FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
Reg += NumRegisters;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 84679f9..f74e652 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SELECTIONDAGBUILDER_H
-#define SELECTIONDAGBUILDER_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
@@ -21,6 +21,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
#include <vector>
namespace llvm {
@@ -200,7 +201,7 @@ private:
}
};
- size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+ void Clusterify(CaseVector &Cases, const SwitchInst &SI);
/// CaseBlock - This structure is used to communicate between
/// SelectionDAGBuilder and SDISel for the code generation of additional basic
@@ -276,9 +277,9 @@ private:
BitTestBlock(APInt F, APInt R, const Value* SV,
unsigned Rg, MVT RgVT, bool E,
MachineBasicBlock* P, MachineBasicBlock* D,
- const BitTestInfo& C):
+ BitTestInfo C):
First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
- Parent(P), Default(D), Cases(C) { }
+ Parent(P), Default(D), Cases(std::move(C)) { }
APInt First;
APInt Range;
const Value *SValue;
@@ -397,7 +398,8 @@ private:
class StackProtectorDescriptor {
public:
StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr),
- FailureMBB(nullptr), Guard(nullptr) { }
+ FailureMBB(nullptr), Guard(nullptr),
+ GuardReg(0) { }
~StackProtectorDescriptor() { }
/// Returns true if all fields of the stack protector descriptor are
@@ -455,6 +457,9 @@ private:
MachineBasicBlock *getFailureMBB() { return FailureMBB; }
const Value *getGuard() { return Guard; }
+ unsigned getGuardReg() const { return GuardReg; }
+ void setGuardReg(unsigned R) { GuardReg = R; }
+
private:
/// The basic block for which we are generating the stack protector.
///
@@ -477,6 +482,9 @@ private:
/// stack protector stack slot.
const Value *Guard;
+ /// The virtual register holding the stack guard value.
+ unsigned GuardReg;
+
/// Add a successor machine basic block to ParentMBB. If the successor mbb
/// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
/// block will be created.
@@ -626,17 +634,23 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
MachineBasicBlock *LandingPad = nullptr);
- std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI,
- unsigned ArgIdx,
- unsigned NumArgs,
- SDValue Callee,
- bool useVoidTy = false);
+ std::pair<SDValue, SDValue> lowerCallOperands(
+ ImmutableCallSite CS,
+ unsigned ArgIdx,
+ unsigned NumArgs,
+ SDValue Callee,
+ bool UseVoidTy = false,
+ MachineBasicBlock *LandingPad = nullptr);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
/// references that need to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
private:
+ std::pair<SDValue, SDValue> lowerInvokable(
+ TargetLowering::CallLoweringInfo &CLI,
+ MachineBasicBlock *LandingPad);
+
// Terminator instructions.
void visitRet(const ReturnInst &I);
void visitBr(const BranchInst &I);
@@ -658,7 +672,6 @@ private:
bool handleBTSplitSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
- MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
bool handleBitTestsSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
@@ -755,6 +768,7 @@ private:
bool visitStrLenCall(const CallInst &I);
bool visitStrNLenCall(const CallInst &I);
bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
+ bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
@@ -767,7 +781,8 @@ private:
void visitVAEnd(const CallInst &I);
void visitVACopy(const CallInst &I);
void visitStackmap(const CallInst &I);
- void visitPatchpoint(const CallInst &I);
+ void visitPatchpoint(ImmutableCallSite CS,
+ MachineBasicBlock *LandingPad = nullptr);
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
@@ -784,7 +799,7 @@ private:
/// EmitFuncArgumentDbgValue - If V is an function argument then create
/// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
- bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr,
int64_t Offset, bool IsIndirect,
const SDValue &N);
};
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index b3a452f..c9f6cff 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
std::string SDNode::getOperationName(const SelectionDAG *G) const {
@@ -36,7 +37,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "<<Unknown DAG Node>>";
if (isMachineOpcode()) {
if (G)
- if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo())
if (getMachineOpcode() < TII->getNumOpcodes())
return TII->getName(getMachineOpcode());
return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
@@ -140,6 +141,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Unary operators
case ISD::FABS: return "fabs";
+ case ISD::FMINNUM: return "fminnum";
+ case ISD::FMAXNUM: return "fmaxnum";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin";
@@ -236,8 +239,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
- case ISD::FP16_TO_FP32: return "fp16_to_fp32";
- case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+ case ISD::FP16_TO_FP: return "fp16_to_fp";
+ case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::CONVERT_RNDSAT: {
switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
@@ -433,7 +436,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr);
+ OS << ' ' << PrintReg(R->getReg(),
+ G ? G->getSubtarget().getRegisterInfo() : nullptr);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
OS << "'" << ES->getSymbol() << "'";
@@ -565,7 +569,7 @@ void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
- if (!once.insert(N)) // If we've been here before, return now.
+ if (!once.insert(N).second) // If we've been here before, return now.
return;
// Dump the current SDNode, but don't end the line yet.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 57e22e2..79109b7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -284,8 +284,8 @@ namespace llvm {
/// for the target.
ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetLowering *TLI = IS->getTargetLowering();
- const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
+ const TargetLowering *TLI = IS->TLI;
+ const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() ||
TLI->getSchedulingPreference() == Sched::Source)
@@ -336,7 +336,7 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm),
- FuncInfo(new FunctionLoweringInfo(TM)),
+ FuncInfo(new FunctionLoweringInfo()),
CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
@@ -411,32 +411,32 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
"-fast-isel-abort requires -fast-isel");
const Function &Fn = *mf.getFunction();
- const TargetInstrInfo &TII = *TM.getInstrInfo();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
- const TargetLowering *TLI = TM.getTargetLowering();
-
MF = &mf;
- RegInfo = &MF->getRegInfo();
- AA = &getAnalysis<AliasAnalysis>();
- LibInfo = &getAnalysis<TargetLibraryInfo>();
- GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
-
- TargetSubtargetInfo &ST =
- const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
- ST.resetSubtargetFeatures(MF);
- TM.resetTargetOptions(MF);
+ // Reset the target options before resetting the optimization
+ // level below.
+ // FIXME: This is a horrible hack and should be processed via
+ // codegen looking at the optimization level explicitly when
+ // it wants to look at it.
+ TM.resetTargetOptions(Fn);
// Reset OptLevel to None for optnone functions.
CodeGenOpt::Level NewOptLevel = OptLevel;
if (Fn.hasFnAttribute(Attribute::OptimizeNone))
NewOptLevel = CodeGenOpt::None;
OptLevelChanger OLC(*this, NewOptLevel);
+ TII = MF->getSubtarget().getInstrInfo();
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ LibInfo = &getAnalysis<TargetLibraryInfo>();
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
- CurDAG->init(*MF, TLI);
+ CurDAG->init(*MF);
FuncInfo->set(Fn, *MF, CurDAG);
if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -454,7 +454,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// copied into vregs, emit the copies into the top of the block before
// emitting the code for the block.
MachineBasicBlock *EntryMBB = MF->begin();
- RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
DenseMap<unsigned, unsigned> LiveInMap;
if (!FuncInfo->ArgDbgValues.empty())
@@ -489,15 +490,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
"- add if needed");
MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
MachineBasicBlock::iterator InsertPos = Def;
- const MDNode *Variable =
- MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ const MDNode *Variable = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
bool IsIndirect = MI->isIndirectDebugValue();
unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
// Def is never a terminator here, so it is ok to increment InsertPos.
BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
- TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect,
- LDI->second, Offset, Variable);
+ TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset,
+ Variable, Expr);
// If this vreg is directly copied into an exported register then
// that COPY instructions also need DBG_VALUE, if it is the only
@@ -516,11 +516,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
if (CopyUseMI) {
MachineInstr *NewMI =
- BuildMI(*MF, CopyUseMI->getDebugLoc(),
- TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect,
- CopyUseMI->getOperand(0).getReg(),
- Offset, Variable);
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
MachineBasicBlock::iterator Pos = CopyUseMI;
EntryMBB->insertAfter(Pos, NewMI);
}
@@ -534,7 +532,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
break;
for (const auto &MI : MBB) {
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode());
+ const MCInstrDesc &MCID = TII->get(MI.getOpcode());
if ((MCID.isCall() && !MCID.isReturn()) ||
MI.isStackAligningInlineAsm()) {
MFI->setHasCalls(true);
@@ -617,7 +615,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SDNode *N = Worklist.pop_back_val();
// If we've already seen this node, ignore it.
- if (!VisitedNodes.insert(N))
+ if (!VisitedNodes.insert(N).second)
continue;
// Otherwise, add all chain operands to the worklist.
@@ -901,12 +899,11 @@ void SelectionDAGISel::PrepareEHLandingPad() {
// Assign the call site to the landing pad's begin label.
MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
- const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+ const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
// Mark exception register as live in.
- const TargetLowering *TLI = getTargetLowering();
const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
if (unsigned Reg = TLI->getExceptionPointerRegister())
FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
@@ -1042,7 +1039,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
if (TM.Options.EnableFastISel)
- FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo);
+ FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
// Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
@@ -1096,7 +1093,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
++NumEntryBlocks;
// Lower any arguments needed in this block if this is the entry block.
- if (!FastIS->LowerArguments()) {
+ if (!FastIS->lowerArguments()) {
// Fast isel failed to lower these arguments
++NumFastIselFailLowerArguments;
if (EnableFastISelAbortArgs)
@@ -1134,7 +1131,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
// Try to select the instruction with FastISel.
- if (FastIS->SelectInstruction(Inst)) {
+ if (FastIS->selectInstruction(Inst)) {
--NumFastIselRemaining;
++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
@@ -1729,7 +1726,7 @@ static SDNode *findGlueUse(SDNode *N) {
/// This function recursively traverses up the operand chain, ignoring
/// certain nodes.
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
- SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+ SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
bool IgnoreChains) {
// The NodeID's are given uniques ID's where a node ID is guaranteed to be
// greater than all of its (recursive) operands. If we scan to a point where
@@ -1744,7 +1741,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// Don't revisit nodes if we already scanned it and didn't fail, we know we
// won't fail if we scan it again.
- if (!Visited.insert(Use))
+ if (!Visited.insert(Use).second)
return false;
for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
@@ -1861,8 +1858,8 @@ SDNode
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg = getTargetLowering()->getRegisterByName(
- RegStr->getString().data(), Op->getValueType(0));
+ unsigned Reg =
+ TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0));
SDValue New = CurDAG->getCopyFromReg(
CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0));
New->setNodeId(-1);
@@ -1874,8 +1871,8 @@ SDNode
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg = getTargetLowering()->getRegisterByName(
- RegStr->getString().data(), Op->getOperand(2).getValueType());
+ unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
+ Op->getOperand(2).getValueType());
SDValue New = CurDAG->getCopyToReg(
CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2));
New->setNodeId(-1);
@@ -2375,7 +2372,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Result = !::CheckOpcode(Table, Index, N.getNode());
return Index;
case SelectionDAGISel::OPC_CheckType:
- Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering());
+ Result = !::CheckType(Table, Index, N, SDISel.TLI);
return Index;
case SelectionDAGISel::OPC_CheckChild0Type:
case SelectionDAGISel::OPC_CheckChild1Type:
@@ -2385,14 +2382,15 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckChild5Type:
case SelectionDAGISel::OPC_CheckChild6Type:
case SelectionDAGISel::OPC_CheckChild7Type:
- Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(),
- Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+ Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+ Table[Index - 1] -
+ SelectionDAGISel::OPC_CheckChild0Type);
return Index;
case SelectionDAGISel::OPC_CheckCondCode:
Result = !::CheckCondCode(Table, Index, N);
return Index;
case SelectionDAGISel::OPC_CheckValueType:
- Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering());
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
return Index;
case SelectionDAGISel::OPC_CheckInteger:
Result = !::CheckInteger(Table, Index, N);
@@ -2436,6 +2434,42 @@ struct MatchScope {
bool HasChainNodesMatched, HasGlueResultNodesMatched;
};
+/// \\brief A DAG update listener to keep the matching state
+/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to
+/// change the DAG while matching. X86 addressing mode matcher is an example
+/// for this.
+class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
+{
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes;
+ SmallVectorImpl<MatchScope> &MatchScopes;
+public:
+ MatchStateUpdater(SelectionDAG &DAG,
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN,
+ SmallVectorImpl<MatchScope> &MS) :
+ SelectionDAG::DAGUpdateListener(DAG),
+ RecordedNodes(RN), MatchScopes(MS) { }
+
+ void NodeDeleted(SDNode *N, SDNode *E) {
+ // Some early-returns here to avoid the search if we deleted the node or
+ // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
+ // do, so it's unnecessary to update matching state at that point).
+ // Neither of these can occur currently because we only install this
+ // update listener during matching a complex patterns.
+ if (!E || E->isMachineOpcode())
+ return;
+ // Performing linear search here does not matter because we almost never
+ // run this code. You'd have to have a CSE during complex pattern
+ // matching.
+ for (auto &I : RecordedNodes)
+ if (I.first.getNode() == N)
+ I.first.setNode(E);
+
+ for (auto &I : MatchScopes)
+ for (auto &J : I.NodeStack)
+ if (J.getNode() == N)
+ J.setNode(E);
+ }
+};
}
SDNode *SelectionDAGISel::
@@ -2449,8 +2483,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::BasicBlock:
case ISD::Register:
case ISD::RegisterMask:
- //case ISD::VALUETYPE:
- //case ISD::CONDCODE:
case ISD::HANDLENODE:
case ISD::MDNODE_SDNODE:
case ISD::TargetConstant:
@@ -2692,6 +2724,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned CPNum = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+
+ // If target can modify DAG during matching, keep the matching state
+ // consistent.
+ std::unique_ptr<MatchStateUpdater> MSU;
+ if (ComplexPatternFuncMutatesDAG())
+ MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes,
+ MatchScopes));
+
if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
RecordedNodes[RecNo].first, CPNum,
RecordedNodes))
@@ -2703,7 +2743,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
continue;
case OPC_CheckType:
- if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering()))
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI))
break;
continue;
@@ -2751,7 +2791,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (CaseVT == MVT::iPTR)
- CaseVT = getTargetLowering()->getPointerTy();
+ CaseVT = TLI->getPointerTy();
// If the VT matches, then we will execute this case.
if (CurNodeVT == CaseVT)
@@ -2773,7 +2813,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckChild2Type: case OPC_CheckChild3Type:
case OPC_CheckChild4Type: case OPC_CheckChild5Type:
case OPC_CheckChild6Type: case OPC_CheckChild7Type:
- if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(),
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
Opcode-OPC_CheckChild0Type))
break;
continue;
@@ -2781,7 +2821,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
continue;
case OPC_CheckValueType:
- if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering()))
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI))
break;
continue;
case OPC_CheckInteger:
@@ -2980,7 +3020,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
for (unsigned i = 0; i != NumVTs; ++i) {
MVT::SimpleValueType VT =
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
- if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy;
+ if (VT == MVT::iPTR)
+ VT = TLI->getPointerTy().SimpleTy;
VTs.push_back(VT);
}
@@ -3076,7 +3117,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (EmitNodeInfo & OPFL_MemRefs) {
// Only attach load or store memory operands if the generated
// instruction may load or store.
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+ const MCInstrDesc &MCID = TII->get(TargetOpc);
bool mayLoad = MCID.mayLoad();
bool mayStore = MCID.mayStore();
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 42372a2..9aef5ed 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -31,13 +31,13 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
using namespace llvm;
-/// NOTE: The constructor takes ownership of TLOF.
-TargetLowering::TargetLowering(const TargetMachine &tm,
- const TargetLoweringObjectFile *tlof)
- : TargetLoweringBase(tm, tlof) {}
+/// NOTE: The TargetMachine owns TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm)
+ : TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
@@ -2177,7 +2177,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
// Figure out which register class contains this reg.
- const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *RI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
@@ -2239,14 +2240,11 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
// Do a prepass over the constraints, canonicalizing them, and building up the
// ConstraintOperands list.
- InlineAsm::ConstraintInfoVector
- ConstraintInfos = IA->ParseConstraints();
-
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
- ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+ ConstraintOperands.emplace_back(std::move(CI));
AsmOperandInfo &OpInfo = ConstraintOperands.back();
// Update multiple alternative constraint count.
@@ -2325,7 +2323,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
}
// If we have multiple alternative constraints, select the best alternative.
- if (ConstraintInfos.size()) {
+ if (ConstraintOperands.size()) {
if (maCount) {
unsigned bestMAIndex = 0;
int bestWeight = -1;
@@ -2641,11 +2639,13 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
/// \brief Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number. See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG, bool IsAfterLegalization,
std::vector<SDNode *> *Created) const {
+ assert(Created && "No vector to hold sdiv ops.");
+
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -2673,38 +2673,36 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
// If d > 0 and m < 0, add the numerator
if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
- if (Created)
- Created->push_back(Q.getNode());
+ Created->push_back(Q.getNode());
}
// If d < 0 and m > 0, subtract the numerator.
if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
- if (Created)
- Created->push_back(Q.getNode());
+ Created->push_back(Q.getNode());
}
// Shift right algebraic if shift value is nonzero
if (magics.s > 0) {
Q = DAG.getNode(ISD::SRA, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
- if (Created)
- Created->push_back(Q.getNode());
+ Created->push_back(Q.getNode());
}
// Extract the sign bit and add it to the quotient
SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(VT.getScalarSizeInBits() - 1,
getShiftAmountTy(Q.getValueType())));
- if (Created)
- Created->push_back(T.getNode());
+ Created->push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
/// \brief Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number. See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG, bool IsAfterLegalization,
std::vector<SDNode *> *Created) const {
+ assert(Created && "No vector to hold udiv ops.");
+
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -2725,8 +2723,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
unsigned Shift = Divisor.countTrailingZeros();
Q = DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
- if (Created)
- Created->push_back(Q.getNode());
+ Created->push_back(Q.getNode());
// Get magic number for the shifted divisor.
magics = Divisor.lshr(Shift).magicu(Shift);
@@ -2744,8 +2741,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
DAG.getConstant(magics.m, VT)).getNode(), 1);
else
return SDValue(); // No mulhu or equvialent
- if (Created)
- Created->push_back(Q.getNode());
+
+ Created->push_back(Q.getNode());
if (magics.a == 0) {
assert(magics.s < Divisor.getBitWidth() &&
@@ -2754,15 +2751,12 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
- if (Created)
- Created->push_back(NPQ.getNode());
+ Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
- if (Created)
- Created->push_back(NPQ.getNode());
+ Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
- if (Created)
- Created->push_back(NPQ.getNode());
+ Created->push_back(NPQ.getNode());
return DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
}
@@ -2785,7 +2779,7 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
SelectionDAG &DAG, SDValue LL, SDValue LH,
- SDValue RL, SDValue RH) const {
+ SDValue RL, SDValue RH) const {
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -2818,8 +2812,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
// The inputs are both zero-extended.
if (HasUMUL_LOHI) {
// We can emit a umul_lohi.
- Lo = DAG.getNode(ISD::UMUL_LOHI, dl,
- DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
+ RL);
Hi = SDValue(Lo.getNode(), 1);
return true;
}
@@ -2834,8 +2828,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
// The input values are both sign-extended.
if (HasSMUL_LOHI) {
// We can emit a smul_lohi.
- Lo = DAG.getNode(ISD::SMUL_LOHI, dl,
- DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
+ RL);
Hi = SDValue(Lo.getNode(), 1);
return true;
}
@@ -2885,3 +2879,65 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
}
return false;
}
+
+bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+ SDLoc dl(SDValue(Node, 0));
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (VT != MVT::f32 || NVT != MVT::i64)
+ return false;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits());
+ SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
+ SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
+ SDValue Bias = DAG.getConstant(127, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+ IntVT);
+ SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
+ SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+
+ SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT)));
+ SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT)));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+
+ SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, IntVT));
+
+ R = DAG.getZExtOrTrunc(R, dl, NVT);
+
+
+ R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
+ DAG.getNode(ISD::SHL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+ dl, getShiftAmountTy(IntVT))),
+ DAG.getNode(ISD::SRL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+ dl, getShiftAmountTy(IntVT))),
+ ISD::SETGT);
+
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
+ Sign);
+
+ Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
+ DAG.getConstant(0, NVT), Ret, ISD::SETLT);
+ return true;
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index f7c64da..0be00f0 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -144,7 +144,7 @@ namespace {
LLVMContext &C = F.getContext();
BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
- Type::getInt32Ty(C), NULL);
+ Type::getInt32Ty(C), nullptr);
Constant *PersFn =
F.getParent()->
getOrInsertFunction("__gcc_personality_v0",
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index d2f3955..7fd8107 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -98,7 +99,7 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
VoidPtrTy, // __personality
VoidPtrTy, // __lsda
ArrayType::get(VoidPtrTy, 5), // __jbuf
- NULL);
+ nullptr);
RegisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
PointerType::getUnqual(FunctionContextTy), (Type *)nullptr);
@@ -138,8 +139,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
/// we reach blocks we've already seen.
static void MarkBlocksLiveIn(BasicBlock *BB,
- SmallPtrSet<BasicBlock *, 64> &LiveBBs) {
- if (!LiveBBs.insert(BB))
+ SmallPtrSetImpl<BasicBlock *> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second)
return; // already been here.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
@@ -190,7 +191,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
unsigned Align =
TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
@@ -249,34 +250,16 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
++AI) {
Type *Ty = AI->getType();
- // Aggregate types can't be cast, but are legal argument types, so we have
- // to handle them differently. We use an extract/insert pair as a
- // lightweight method to achieve the same goal.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
- Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt);
- Instruction *NI = InsertValueInst::Create(AI, EI, 0);
- NI->insertAfter(EI);
- AI->replaceAllUsesWith(NI);
-
- // Set the operand of the instructions back to the AllocaInst.
- EI->setOperand(0, AI);
- NI->setOperand(0, AI);
- } else {
- // This is always a no-op cast because we're casting AI to AI->getType()
- // so src and destination types are identical. BitCast is the only
- // possibility.
- CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp",
- AfterAllocaInsPt);
- AI->replaceAllUsesWith(NC);
-
- // Set the operand of the cast instruction back to the AllocaInst.
- // Normally it's forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However, we're
- // replacing it here with the same value it was constructed with. We do
- // this because the above replaceAllUsesWith() clobbered the operand, but
- // we want this one to remain.
- NC->setOperand(0, AI);
- }
+ // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
+ Value *TrueValue = ConstantInt::getTrue(F.getContext());
+ Value *UndefValue = UndefValue::get(Ty);
+ Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue,
+ AI->getName() + ".tmp",
+ AfterAllocaInsPt);
+ AI->replaceAllUsesWith(SI);
+
+ // Reset the operand, because it was clobbered by the RAUW above.
+ SI->setOperand(1, AI);
}
}
@@ -368,10 +351,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
continue;
// Demote the PHIs to the stack.
- for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(),
- E = PHIsToDemote.end();
- I != E; ++I)
- DemotePHIToStack(*I);
+ for (PHINode *PN : PHIsToDemote)
+ DemotePHIToStack(PN);
// Move the landingpad instruction back to the top of the landing pad block.
LPI->moveBefore(UnwindBlock->begin());
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 24e94d1..97a5424 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -60,27 +61,6 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-namespace {
-static BlockFrequency Threshold;
-}
-
-/// Decision threshold. A node gets the output value 0 if the weighted sum of
-/// its inputs falls in the open interval (-Threshold;Threshold).
-static BlockFrequency getThreshold() { return Threshold; }
-
-/// \brief Set the threshold for a given entry frequency.
-///
-/// Set the threshold relative to \c Entry. Since the threshold is used as a
-/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
-/// threshold.
-static void setThreshold(const BlockFrequency &Entry) {
- // Apparently 2 is a good threshold when Entry==2^14, but we need to scale
- // it. Divide by 2^13, rounding as appropriate.
- uint64_t Freq = Entry.getFrequency();
- uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12));
- Threshold = std::max(UINT64_C(1), Scaled);
-}
-
/// Node - Each edge bundle corresponds to a Hopfield node.
///
/// The node contains precomputed frequency data that only depends on the CFG,
@@ -126,9 +106,9 @@ struct SpillPlacement::Node {
/// clear - Reset per-query data, but preserve frequencies that only depend on
// the CFG.
- void clear() {
+ void clear(const BlockFrequency &Threshold) {
BiasN = BiasP = Value = 0;
- SumLinkWeights = getThreshold();
+ SumLinkWeights = Threshold;
Links.clear();
}
@@ -166,7 +146,7 @@ struct SpillPlacement::Node {
/// update - Recompute Value from Bias and Links. Return true when node
/// preference changes.
- bool update(const Node nodes[]) {
+ bool update(const Node nodes[], const BlockFrequency &Threshold) {
// Compute the weighted sum of inputs.
BlockFrequency SumN = BiasN;
BlockFrequency SumP = BiasP;
@@ -186,9 +166,9 @@ struct SpillPlacement::Node {
// 2. It helps tame rounding errors when the links nominally sum to 0.
//
bool Before = preferReg();
- if (SumN >= SumP + getThreshold())
+ if (SumN >= SumP + Threshold)
Value = -1;
- else if (SumP >= SumN + getThreshold())
+ else if (SumP >= SumN + Threshold)
Value = 1;
else
Value = 0;
@@ -227,7 +207,7 @@ void SpillPlacement::activate(unsigned n) {
if (ActiveNodes->test(n))
return;
ActiveNodes->set(n);
- nodes[n].clear();
+ nodes[n].clear(Threshold);
// Very large bundles usually come from big switches, indirect branches,
// landing pads, or loops with many 'continue' statements. It is difficult to
@@ -244,6 +224,18 @@ void SpillPlacement::activate(unsigned n) {
}
}
+/// \brief Set the threshold for a given entry frequency.
+///
+/// Set the threshold relative to \c Entry. Since the threshold is used as a
+/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
+/// threshold.
+void SpillPlacement::setThreshold(const BlockFrequency &Entry) {
+ // Apparently 2 is a good threshold when Entry==2^14, but we need to scale
+ // it. Divide by 2^13, rounding as appropriate.
+ uint64_t Freq = Entry.getFrequency();
+ uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12));
+ Threshold = std::max(UINT64_C(1), Scaled);
+}
/// addConstraints - Compute node biases and weights from a set of constraints.
/// Set a bit in NodeMask for each active node.
@@ -310,7 +302,7 @@ bool SpillPlacement::scanActiveBundles() {
Linked.clear();
RecentPositive.clear();
for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
- nodes[n].update(nodes);
+ nodes[n].update(nodes, Threshold);
// A node that must spill, or a node without any links is not going to
// change its value ever again, so exclude it from iterations.
if (nodes[n].mustSpill())
@@ -330,7 +322,7 @@ void SpillPlacement::iterate() {
// First update the recently positive nodes. They have likely received new
// negative bias that will turn them off.
while (!RecentPositive.empty())
- nodes[RecentPositive.pop_back_val()].update(nodes);
+ nodes[RecentPositive.pop_back_val()].update(nodes, Threshold);
if (Linked.empty())
return;
@@ -349,7 +341,7 @@ void SpillPlacement::iterate() {
iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()),
E = Linked.rend(); I != E; ++I) {
unsigned n = *I;
- if (nodes[n].update(nodes)) {
+ if (nodes[n].update(nodes, Threshold)) {
Changed = true;
if (nodes[n].preferReg())
RecentPositive.push_back(n);
@@ -363,7 +355,7 @@ void SpillPlacement::iterate() {
for (SmallVectorImpl<unsigned>::const_iterator I =
std::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
unsigned n = *I;
- if (nodes[n].update(nodes)) {
+ if (nodes[n].update(nodes, Threshold)) {
Changed = true;
if (nodes[n].preferReg())
RecentPositive.push_back(n);
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 43fc7f5..622361e 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -24,8 +24,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
-#define LLVM_CODEGEN_SPILLPLACEMENT_H
+#ifndef LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
@@ -40,7 +40,7 @@ class MachineBasicBlock;
class MachineLoopInfo;
class MachineBlockFrequencyInfo;
-class SpillPlacement : public MachineFunctionPass {
+class SpillPlacement : public MachineFunctionPass {
struct Node;
const MachineFunction *MF;
const EdgeBundles *bundles;
@@ -60,7 +60,11 @@ class SpillPlacement : public MachineFunctionPass {
SmallVector<unsigned, 8> RecentPositive;
// Block frequencies are computed once. Indexed by block number.
- SmallVector<BlockFrequency, 4> BlockFrequencies;
+ SmallVector<BlockFrequency, 8> BlockFrequencies;
+
+ /// Decision threshold. A node gets the output value 0 if the weighted sum of
+ /// its inputs falls in the open interval (-Threshold;Threshold).
+ BlockFrequency Threshold;
public:
static char ID; // Pass identification, replacement for typeid.
@@ -152,6 +156,7 @@ private:
void releaseMemory() override;
void activate(unsigned);
+ void setThreshold(const BlockFrequency &Entry);
};
} // end namespace llvm
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
deleted file mode 100644
index 0649448..0000000
--- a/lib/CodeGen/Spiller.cpp
+++ /dev/null
@@ -1,184 +0,0 @@
-//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Spiller.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "spiller"
-
-namespace {
- enum SpillerName { trivial, inline_ };
-}
-
-static cl::opt<SpillerName>
-spillerOpt("spiller",
- cl::desc("Spiller to use: (default: standard)"),
- cl::Prefix,
- cl::values(clEnumVal(trivial, "trivial spiller"),
- clEnumValN(inline_, "inline", "inline spiller"),
- clEnumValEnd),
- cl::init(trivial));
-
-// Spiller virtual destructor implementation.
-Spiller::~Spiller() {}
-
-namespace {
-
-/// Utility class for spillers.
-class SpillerBase : public Spiller {
-protected:
- MachineFunctionPass *pass;
- MachineFunction *mf;
- VirtRegMap *vrm;
- LiveIntervals *lis;
- MachineFrameInfo *mfi;
- MachineRegisterInfo *mri;
- const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
-
- /// Construct a spiller base.
- SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
- : pass(&pass), mf(&mf), vrm(&vrm)
- {
- lis = &pass.getAnalysis<LiveIntervals>();
- mfi = mf.getFrameInfo();
- mri = &mf.getRegInfo();
- tii = mf.getTarget().getInstrInfo();
- tri = mf.getTarget().getRegisterInfo();
- }
-
- /// Add spill ranges for every use/def of the live interval, inserting loads
- /// immediately before each use, and stores after each def. No folding or
- /// remat is attempted.
- void trivialSpillEverywhere(LiveRangeEdit& LRE) {
- LiveInterval* li = &LRE.getParent();
-
- DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
-
- assert(li->weight != llvm::huge_valf &&
- "Attempting to spill already spilled value.");
-
- assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
- "Trying to spill a stack slot.");
-
- DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
-
- const TargetRegisterClass *trc = mri->getRegClass(li->reg);
- unsigned ss = vrm->assignVirt2StackSlot(li->reg);
-
- // Iterate over reg uses/defs.
- for (MachineRegisterInfo::reg_instr_iterator
- regItr = mri->reg_instr_begin(li->reg);
- regItr != mri->reg_instr_end();) {
-
- // Grab the use/def instr.
- MachineInstr *mi = &*regItr;
-
- DEBUG(dbgs() << " Processing " << *mi);
-
- // Step regItr to the next use/def instr.
- ++regItr;
-
- // Collect uses & defs for this instr.
- SmallVector<unsigned, 2> indices;
- bool hasUse = false;
- bool hasDef = false;
- for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
- MachineOperand &op = mi->getOperand(i);
- if (!op.isReg() || op.getReg() != li->reg)
- continue;
- hasUse |= mi->getOperand(i).isUse();
- hasDef |= mi->getOperand(i).isDef();
- indices.push_back(i);
- }
-
- // Create a new virtual register for the load and/or store.
- unsigned NewVReg = LRE.create();
-
- // Update the reg operands & kill flags.
- for (unsigned i = 0; i < indices.size(); ++i) {
- unsigned mopIdx = indices[i];
- MachineOperand &mop = mi->getOperand(mopIdx);
- mop.setReg(NewVReg);
- if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
- mop.setIsKill(true);
- }
- }
- assert(hasUse || hasDef);
-
- // Insert reload if necessary.
- MachineBasicBlock::iterator miItr(mi);
- if (hasUse) {
- MachineInstrSpan MIS(miItr);
-
- tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc,
- tri);
- lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr);
- }
-
- // Insert store if necessary.
- if (hasDef) {
- MachineInstrSpan MIS(miItr);
-
- tii->storeRegToStackSlot(*mi->getParent(), std::next(miItr), NewVReg,
- true, ss, trc, tri);
- lis->InsertMachineInstrRangeInMaps(std::next(miItr), MIS.end());
- }
- }
- }
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// Spills any live range using the spill-everywhere method with no attempt at
-/// folding.
-class TrivialSpiller : public SpillerBase {
-public:
-
- TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
- VirtRegMap &vrm)
- : SpillerBase(pass, mf, vrm) {}
-
- void spill(LiveRangeEdit &LRE) override {
- // Ignore spillIs - we don't use it.
- trivialSpillEverywhere(LRE);
- }
-};
-
-} // end anonymous namespace
-
-void Spiller::anchor() { }
-
-llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm) {
- switch (spillerOpt) {
- case trivial: return new TrivialSpiller(pass, mf, vrm);
- case inline_: return createInlineSpiller(pass, mf, vrm);
- }
- llvm_unreachable("Invalid spiller optimization");
-}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index b7d5bea..08f99ec 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_SPILLER_H
-#define LLVM_CODEGEN_SPILLER_H
+#ifndef LLVM_LIB_CODEGEN_SPILLER_H
+#define LLVM_LIB_CODEGEN_SPILLER_H
namespace llvm {
@@ -31,11 +31,6 @@ namespace llvm {
};
- /// Create and return a spiller object, as specified on the command line.
- Spiller* createSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm);
-
/// Create and return a spiller that will insert spill code directly instead
/// of deferring though VirtRegMap.
Spiller *createInlineSpiller(MachineFunctionPass &pass,
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 7d4f568..ea7b914 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -40,16 +40,11 @@ STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
// Split Analysis
//===----------------------------------------------------------------------===//
-SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
- const LiveIntervals &lis,
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
const MachineLoopInfo &mli)
- : MF(vrm.getMachineFunction()),
- VRM(vrm),
- LIS(lis),
- Loops(mli),
- TII(*MF.getTarget().getInstrInfo()),
- CurLI(nullptr),
- LastSplitPoint(MF.getNumBlockIDs()) {}
+ : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli),
+ TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr),
+ LastSplitPoint(MF.getNumBlockIDs()) {}
void SplitAnalysis::clear() {
UseSlots.clear();
@@ -321,22 +316,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
//===----------------------------------------------------------------------===//
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa,
- LiveIntervals &lis,
- VirtRegMap &vrm,
+SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
MachineDominatorTree &mdt,
MachineBlockFrequencyInfo &mbfi)
- : SA(sa), LIS(lis), VRM(vrm),
- MRI(vrm.getMachineFunction().getRegInfo()),
- MDT(mdt),
- TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
- TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
- MBFI(mbfi),
- Edit(nullptr),
- OpenIdx(0),
- SpillMode(SM_Partition),
- RegAssign(Allocator)
-{}
+ : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()),
+ MDT(mdt), TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()),
+ MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition),
+ RegAssign(Allocator) {}
void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
Edit = &LRE;
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 7048ee3..2e60c14 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_SPLITKIT_H
-#define LLVM_CODEGEN_SPLITKIT_H
+#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H
+#define LLVM_LIB_CODEGEN_SPLITKIT_H
#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 370430c..dcf1b44 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -228,7 +228,7 @@ void StackColoring::dump() const {
unsigned StackColoring::collectMarkers(unsigned NumSlot) {
unsigned MarkersFound = 0;
// Scan the function to find all lifetime markers.
- // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
+ // NOTE: We use a reverse-post-order iteration to ensure that we obtain a
// deterministic numbering, and because we'll need a post-order iteration
// later for solving the liveness dataflow problem.
for (MachineBasicBlock *MBB : depth_first(MF)) {
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 3ba502f..c2ee87a 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/StackMapLivenessAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -67,7 +67,7 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
<< _MF.getName() << " **********\n");
MF = &_MF;
- TRI = MF->getTarget().getRegisterInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
++NumStackMapFuncVisited;
// Skip this function if there are no patchpoints to process.
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 1473fc1..d3791c3 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -24,6 +24,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <iterator>
using namespace llvm;
@@ -83,7 +84,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
switch (MOI->getImm()) {
default: llvm_unreachable("Unrecognized operand type.");
case StackMaps::DirectMemRefOp: {
- unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
+ unsigned Size =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits();
assert((Size % 8) == 0 && "Need pointer size in bytes.");
Size /= 8;
unsigned Reg = (++MOI)->getReg();
@@ -122,7 +124,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
"Virtreg operands should have been rewritten before now.");
const TargetRegisterClass *RC =
- AP.TM.getRegisterInfo()->getMinimalPhysRegClass(MOI->getReg());
+ AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass(
+ MOI->getReg());
assert(!MOI->getSubReg() && "Physical subreg still around.");
Locs.push_back(
Location(Location::Register, RC->getSize(), MOI->getReg(), 0));
@@ -158,7 +161,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
StackMaps::LiveOutVec
StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
assert(Mask && "No register mask specified");
- const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
LiveOutVec LiveOuts;
// Create a LiveOutReg for each bit that is set in the register mask.
@@ -217,9 +220,18 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
I != E; ++I) {
// Constants are encoded as sign-extended integers.
// -1 is directly encoded as .long 0xFFFFFFFF with no constant pool.
- if (I->LocType == Location::Constant &&
- ((I->Offset + (int64_t(1)<<31)) >> 32) != 0) {
+ if (I->LocType == Location::Constant && !isInt<32>(I->Offset)) {
I->LocType = Location::ConstantIndex;
+ // ConstPool is intentionally a MapVector of 'uint64_t's (as
+ // opposed to 'int64_t's). We should never be in a situation
+ // where we have to insert either the tombstone or the empty
+ // keys into a map, and for a DenseMap<uint64_t, T> these are
+ // (uint64_t)0 and (uint64_t)-1. They can be and are
+ // represented using 32 bit integers.
+
+ assert((uint64_t)I->Offset != DenseMapInfo<uint64_t>::getEmptyKey() &&
+ (uint64_t)I->Offset != DenseMapInfo<uint64_t>::getTombstoneKey() &&
+ "empty and tombstone keys should fit in 32 bits!");
auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset));
I->Offset = Result.first - ConstPool.begin();
}
@@ -232,12 +244,16 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext),
OutContext);
- CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, Locations, LiveOuts));
+ CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
+ std::move(LiveOuts));
// Record the stack size of the current function.
const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
+ const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
+ const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+ RegInfo->needsStackRealignment(*(AP.MF));
FnStackSize[AP.CurrentFnSym] =
- MFI->hasVarSizedObjects() ? UINT64_MAX : MFI->getStackSize();
+ DynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
}
void StackMaps::recordStackMap(const MachineInstr &MI) {
@@ -485,7 +501,7 @@ void StackMaps::serializeToStackMapSection() {
MCContext &OutContext = AP.OutStreamer.getContext();
MCStreamer &OS = AP.OutStreamer;
- const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
// Create the section.
const MCSection *StackMapSection =
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index accfe7b..45f97ac 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cstdlib>
using namespace llvm;
@@ -85,7 +86,7 @@ bool StackProtector::runOnFunction(Function &Fn) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- TLI = TM->getTargetLowering();
+ TLI = TM->getSubtargetImpl()->getTargetLowering();
Attribute Attr = Fn.getAttributes().getAttribute(
AttributeSet::FunctionIndex, "stack-protector-buffer-size");
@@ -168,7 +169,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
} else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
// Keep track of what PHI nodes we have already visited to ensure
// they are only visited once.
- if (VisitedPHIs.insert(PN))
+ if (VisitedPHIs.insert(PN).second)
if (HasAddressTaken(PN))
return true;
} else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
@@ -479,12 +480,12 @@ BasicBlock *StackProtector::CreateFailBB() {
if (Trip.getOS() == llvm::Triple::OpenBSD) {
Constant *StackChkFail = M->getOrInsertFunction(
"__stack_smash_handler", Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context), NULL);
+ Type::getInt8PtrTy(Context), nullptr);
B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
Constant *StackChkFail = M->getOrInsertFunction(
- "__stack_chk_fail", Type::getVoidTy(Context), NULL);
+ "__stack_chk_fail", Type::getVoidTy(Context), nullptr);
B.CreateCall(StackChkFail);
}
B.CreateUnreachable();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 791168f..cc72e5e 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -28,7 +28,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <vector>
using namespace llvm;
@@ -422,7 +422,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
});
MFI = MF.getFrameInfo();
- TII = MF.getTarget().getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
LS = &getAnalysis<LiveStacks>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 723a629..4377236 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "tailduplication"
@@ -135,8 +136,8 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
if (skipOptnoneFunction(*MF.getFunction()))
return false;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
@@ -798,11 +799,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
RS->enterBasicBlock(PredBB);
if (!PredBB->empty())
RS->forward(std::prev(PredBB->end()));
- BitVector RegsLiveAtExit(TRI->getNumRegs());
- RS->getRegsUsed(RegsLiveAtExit, false);
for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
E = TailBB->livein_end(); I != E; ++I) {
- if (!RegsLiveAtExit[*I])
+ if (!RS->isRegUsed(*I, false))
// If a register is previously livein to the tail but it's not live
// at the end of predecessor BB, then it should be added to its
// livein list.
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 883e9d1..1557d10 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -14,8 +14,8 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cstdlib>
using namespace llvm;
@@ -26,7 +26,7 @@ TargetFrameLowering::~TargetFrameLowering() {
/// the stack frame of the specified index. This is the default implementation
/// which is overridden for some targets.
int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
+ int FI) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getObjectOffset(FI) + MFI->getStackSize() -
getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
@@ -34,7 +34,7 @@ int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg) const {
- const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
// By default, assume all frame indices are referenced via whatever
// getFrameRegister() says. The target can override this if it's doing
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 83966bd0..ab45f89 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -290,13 +290,15 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
Offset = 0;
return true;
}
- unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx);
+ unsigned BitSize =
+ TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx);
// Convert bit size to byte size to be consistent with
// MCRegisterClass::getSize().
if (BitSize % 8)
return false;
- int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
+ int BitOffset =
+ TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
if (BitOffset < 0 || BitOffset % 8)
return false;
@@ -305,7 +307,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
- if (!TM->getDataLayout()->isLittleEndian()) {
+ if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) {
Offset = RC->getSize() - (Offset + Size);
}
return true;
@@ -370,6 +372,10 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
return nullptr;
}
+void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ llvm_unreachable("Not a MachO target");
+}
+
bool TargetInstrInfo::
canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const {
@@ -498,7 +504,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const MachineOperand &MO = MI->getOperand(1-Ops[0]);
MachineBasicBlock::iterator Pos = MI;
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (Flags == MachineMemOperand::MOStore)
storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
@@ -562,8 +568,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
AliasAnalysis *AA) const {
const MachineFunction &MF = *MI->getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetMachine &TM = MF.getTarget();
- const TargetInstrInfo &TII = *TM.getInstrInfo();
// Remat clients assume operand 0 is the defined register.
if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
@@ -582,7 +586,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
// redundant with subsequent checks, but it's target-independent,
// simple, and a common case.
int FrameIdx = 0;
- if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+ if (isLoadFromStackSlot(MI, FrameIdx) &&
MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
return true;
@@ -655,8 +659,8 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// saves compile time, because it doesn't require every single
// stack slot reference to depend on the instruction that does the
// modification.
- const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
return true;
@@ -746,14 +750,14 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
}
/// Return the default expected latency for a def based on it's opcode.
-unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel,
const MachineInstr *DefMI) const {
if (DefMI->isTransient())
return 0;
if (DefMI->mayLoad())
- return SchedModel->LoadLatency;
+ return SchedModel.LoadLatency;
if (isHighLatencyDef(DefMI->getOpcode()))
- return SchedModel->HighLatency;
+ return SchedModel.HighLatency;
return 1;
}
@@ -852,3 +856,77 @@ computeOperandLatency(const InstrItineraryData *ItinData,
defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
+
+bool TargetInstrInfo::getRegSequenceInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+ assert((MI.isRegSequence() ||
+ MI.isRegSequenceLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isRegSequence())
+ return getRegSequenceLikeInputs(MI, DefIdx, InputRegs);
+
+ // We are looking at:
+ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+ assert(DefIdx == 0 && "REG_SEQUENCE only has one def");
+ for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
+ OpIdx += 2) {
+ const MachineOperand &MOReg = MI.getOperand(OpIdx);
+ const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ // Record Reg:SubReg, SubIdx.
+ InputRegs.push_back(RegSubRegPairAndIdx(MOReg.getReg(), MOReg.getSubReg(),
+ (unsigned)MOSubIdx.getImm()));
+ }
+ return true;
+}
+
+bool TargetInstrInfo::getExtractSubregInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const {
+ assert((MI.isExtractSubreg() ||
+ MI.isExtractSubregLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isExtractSubreg())
+ return getExtractSubregLikeInputs(MI, DefIdx, InputReg);
+
+ // We are looking at:
+ // Def = EXTRACT_SUBREG v0.sub1, sub0.
+ assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def");
+ const MachineOperand &MOReg = MI.getOperand(1);
+ const MachineOperand &MOSubIdx = MI.getOperand(2);
+ assert(MOSubIdx.isImm() &&
+ "The subindex of the extract_subreg is not an immediate");
+
+ InputReg.Reg = MOReg.getReg();
+ InputReg.SubReg = MOReg.getSubReg();
+ InputReg.SubIdx = (unsigned)MOSubIdx.getImm();
+ return true;
+}
+
+bool TargetInstrInfo::getInsertSubregInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const {
+ assert((MI.isInsertSubreg() ||
+ MI.isInsertSubregLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isInsertSubreg())
+ return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg);
+
+ // We are looking at:
+ // Def = INSERT_SEQUENCE v0, v1, sub0.
+ assert(DefIdx == 0 && "INSERT_SUBREG only has one def");
+ const MachineOperand &MOBaseReg = MI.getOperand(1);
+ const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ const MachineOperand &MOSubIdx = MI.getOperand(3);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ BaseReg.Reg = MOBaseReg.getReg();
+ BaseReg.SubReg = MOBaseReg.getSubReg();
+
+ InsertedReg.Reg = MOInsertedReg.getReg();
+ InsertedReg.SubReg = MOInsertedReg.getSubReg();
+ InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
+ return true;
+}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index c574fd4..e833fd3 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -34,6 +34,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
using namespace llvm;
@@ -205,6 +206,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FLOOR_F80] = "floorl";
Names[RTLIB::FLOOR_F128] = "floorl";
Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::FMIN_F32] = "fminf";
+ Names[RTLIB::FMIN_F64] = "fmin";
+ Names[RTLIB::FMIN_F80] = "fminl";
+ Names[RTLIB::FMIN_F128] = "fminl";
+ Names[RTLIB::FMIN_PPCF128] = "fminl";
+ Names[RTLIB::FMAX_F32] = "fmaxf";
+ Names[RTLIB::FMAX_F64] = "fmax";
+ Names[RTLIB::FMAX_F80] = "fmaxl";
+ Names[RTLIB::FMAX_F128] = "fmaxl";
+ Names[RTLIB::FMAX_PPCF128] = "fmaxl";
Names[RTLIB::ROUND_F32] = "roundf";
Names[RTLIB::ROUND_F64] = "round";
Names[RTLIB::ROUND_F80] = "roundl";
@@ -220,6 +231,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2";
+ Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2";
+ Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2";
+ Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2";
Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
@@ -418,7 +433,10 @@ static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::f32)
+ return FPEXT_F16_F32;
+ } else if (OpVT == MVT::f32) {
if (RetVT == MVT::f64)
return FPEXT_F32_F64;
if (RetVT == MVT::f128)
@@ -434,7 +452,18 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
/// getFPROUND - Return the FPROUND_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
- if (RetVT == MVT::f32) {
+ if (RetVT == MVT::f16) {
+ if (OpVT == MVT::f32)
+ return FPROUND_F32_F16;
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F16;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F16;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F16;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F16;
+ } else if (RetVT == MVT::f32) {
if (OpVT == MVT::f64)
return FPROUND_F64_F32;
if (OpVT == MVT::f80)
@@ -665,10 +694,9 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
CCs[RTLIB::O_F128] = ISD::SETEQ;
}
-/// NOTE: The constructor takes ownership of TLOF.
-TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
- const TargetLoweringObjectFile *tlof)
- : TM(tm), DL(TM.getDataLayout()), TLOF(*tlof) {
+/// NOTE: The TargetMachine owns TLOF.
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
+ : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) {
initActions();
// Perform these initializations only once.
@@ -682,10 +710,11 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
IntDivIsCheap = false;
- Pow2DivIsCheap = false;
+ Pow2SDivIsCheap = false;
JumpIsExpensive = false;
PredictableSelectIsExpensive = false;
MaskAndBranchFoldingIsLegal = false;
+ HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
@@ -700,7 +729,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
PrefLoopAlignment = 0;
MinStackArgumentAlignment = 1;
InsertFencesForAtomic = false;
- SupportJumpTables = true;
MinimumJumpTableEntries = 4;
InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple()));
@@ -708,10 +736,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
InitLibcallCallingConvs(LibcallCallingConvs);
}
-TargetLoweringBase::~TargetLoweringBase() {
- delete &TLOF;
-}
-
void TargetLoweringBase::initActions() {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
@@ -738,6 +762,8 @@ void TargetLoweringBase::initActions() {
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMINNUM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMAXNUM, (MVT::SimpleValueType)VT, Expand);
// These library functions default to expand.
setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand);
@@ -774,6 +800,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , MVT::f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
setOperationAction(ISD::FCEIL, MVT::f16, Expand);
setOperationAction(ISD::FRINT, MVT::f16, Expand);
@@ -785,6 +813,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , MVT::f32, Expand);
setOperationAction(ISD::FEXP2, MVT::f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
setOperationAction(ISD::FCEIL, MVT::f32, Expand);
setOperationAction(ISD::FRINT, MVT::f32, Expand);
@@ -796,6 +826,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , MVT::f64, Expand);
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FCEIL, MVT::f64, Expand);
setOperationAction(ISD::FRINT, MVT::f64, Expand);
@@ -807,6 +839,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , MVT::f128, Expand);
setOperationAction(ISD::FEXP2, MVT::f128, Expand);
setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f128, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f128, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
setOperationAction(ISD::FCEIL, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
@@ -958,11 +992,10 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
// Add a new memory operand for this FI.
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- TM.getDataLayout()->getPointerSize(),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSize(),
+ MFI.getObjectAlignment(FI));
MIB->addMemOperand(MF, MMO);
// Replace the instruction and update the operand index.
@@ -978,7 +1011,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
/// of the register class for the specified type and its associated "cost".
std::pair<const TargetRegisterClass*, uint8_t>
TargetLoweringBase::findRepresentativeClass(MVT VT) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
if (!RC)
return std::make_pair(RC, 0);
@@ -1005,8 +1039,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const {
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLoweringBase::computeRegisterProperties() {
- assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
- "Too many value types for ValueTypeActions to hold!");
+ static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
// Everything defaults to needing one register.
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
@@ -1089,6 +1123,13 @@ void TargetLoweringBase::computeRegisterProperties() {
}
}
+ if (!isTypeLegal(MVT::f16)) {
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
+ TransformToType[MVT::f16] = MVT::i16;
+ ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
+ }
+
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1154,8 +1195,12 @@ void TargetLoweringBase::computeRegisterProperties() {
TransformToType[i] = MVT::Other;
if (PreferredAction == TypeScalarizeVector)
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
- else
+ else if (PreferredAction == TypeSplitVector)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+ else
+ // Set type action according to the number of elements.
+ ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector
+ : TypeSplitVector);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 03f4a51..efd15e1 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
using namespace dwarf;
@@ -72,9 +73,10 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
Flags,
SectionKind::getDataRel(),
0, Label->getName());
- unsigned Size = TM.getDataLayout()->getPointerSize();
+ unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+ Streamer.EmitValueToAlignment(
+ TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::Create(Size, getContext());
Streamer.EmitELFSize(Label, E);
@@ -287,7 +289,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// FIXME: this is getting the alignment of the character, not the
// alignment of the global!
unsigned Align =
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
+ TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
+ cast<GlobalVariable>(GV));
const char *SizeSpec = ".rodata.str1.";
if (Kind.isMergeable2ByteCString())
@@ -338,8 +341,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
/// getSectionForConstant - Given a mergeable constant with the
/// specified size and relocation information, return a section that it
/// should be placed in.
-const MCSection *TargetLoweringObjectFileELF::
-getSectionForConstant(SectionKind Kind) const {
+const MCSection *
+TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
+ const Constant *C) const {
if (Kind.isMergeableConst4() && MergeableConst4Section)
return MergeableConst4Section;
if (Kind.isMergeableConst8() && MergeableConst8Section)
@@ -354,44 +358,59 @@ getSectionForConstant(SectionKind Kind) const {
return DataRelROSection;
}
-const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
- unsigned Priority, const MCSymbol *KeySym) const {
- // The default scheme is .ctor / .dtor, so we have to invert the priority
- // numbering.
- if (Priority == 65535)
- return StaticCtorSection;
+static const MCSectionELF *getStaticStructorSection(MCContext &Ctx,
+ bool UseInitArray,
+ bool IsCtor,
+ unsigned Priority,
+ const MCSymbol *KeySym) {
+ std::string Name;
+ unsigned Type;
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
+ SectionKind Kind = SectionKind::getDataRel();
+ StringRef COMDAT = KeySym ? KeySym->getName() : "";
+
+ if (KeySym)
+ Flags |= ELF::SHF_GROUP;
if (UseInitArray) {
- std::string Name = std::string(".init_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ if (IsCtor) {
+ Type = ELF::SHT_INIT_ARRAY;
+ Name = ".init_array";
+ } else {
+ Type = ELF::SHT_FINI_ARRAY;
+ Name = ".fini_array";
+ }
+ if (Priority != 65535) {
+ Name += '.';
+ Name += utostr(Priority);
+ }
} else {
- std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (IsCtor)
+ Name = ".ctors";
+ else
+ Name = ".dtors";
+ if (Priority != 65535) {
+ Name += '.';
+ Name += utostr(65535 - Priority);
+ }
+ Type = ELF::SHT_PROGBITS;
}
+
+ return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT);
}
-const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- // The default scheme is .ctor / .dtor, so we have to invert the priority
- // numbering.
- if (Priority == 65535)
- return StaticDtorSection;
+ return getStaticStructorSection(getContext(), UseInitArray, true, Priority,
+ KeySym);
+}
- if (UseInitArray) {
- std::string Name = std::string(".fini_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
- } else {
- std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
- }
+const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getStaticStructorSection(getContext(), UseInitArray, false, Priority,
+ KeySym);
}
void
@@ -565,10 +584,29 @@ bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols(
if (SMO.getKind().isMergeable1ByteCString())
return false;
+ if (SMO.getSegmentName() == "__TEXT" &&
+ SMO.getSectionName() == "__objc_classname" &&
+ SMO.getType() == MachO::S_CSTRING_LITERALS)
+ return false;
+
+ if (SMO.getSegmentName() == "__TEXT" &&
+ SMO.getSectionName() == "__objc_methname" &&
+ SMO.getType() == MachO::S_CSTRING_LITERALS)
+ return false;
+
+ if (SMO.getSegmentName() == "__TEXT" &&
+ SMO.getSectionName() == "__objc_methtype" &&
+ SMO.getType() == MachO::S_CSTRING_LITERALS)
+ return false;
+
if (SMO.getSegmentName() == "__DATA" &&
SMO.getSectionName() == "__cfstring")
return false;
+ // no_dead_strip sections are not atomized in practice.
+ if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
+ return false;
+
switch (SMO.getType()) {
default:
return true;
@@ -610,17 +648,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
return CStringSection;
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
return UStringSection;
- if (Kind.isMergeableConst()) {
+ // With MachO only variables whose corresponding symbol starts with 'l' or
+ // 'L' can be merged, so we only try merging GVs with private linkage.
+ if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) {
if (Kind.isMergeableConst4())
return FourByteConstantSection;
if (Kind.isMergeableConst8())
@@ -654,7 +696,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
}
const MCSection *
-TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind,
+ const Constant *C) const {
// If this constant requires a relocation, we have to put it in the data
// segment, not in the text segment.
if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
@@ -737,7 +780,7 @@ getCOFFSectionFlags(SectionKind K) {
COFF::IMAGE_SCN_MEM_EXECUTE |
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_CNT_CODE;
- else if (K.isBSS ())
+ else if (K.isBSS())
Flags |=
COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
@@ -747,7 +790,7 @@ getCOFFSectionFlags(SectionKind K) {
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE;
- else if (K.isReadOnly())
+ else if (K.isReadOnly() || K.isReadOnlyWithRel())
Flags |=
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ;
@@ -772,7 +815,7 @@ static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) {
if (ComdatGV->getComdat() != C)
report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
- "' is not a key for it's COMDAT.");
+ "' is not a key for its COMDAT.");
return ComdatGV;
}
@@ -841,9 +884,9 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
return ".bss";
if (Kind.isThreadLocal())
return ".tls$";
- if (Kind.isWriteable())
- return ".data";
- return ".rdata";
+ if (Kind.isReadOnly() || Kind.isReadOnlyWithRel())
+ return ".rdata";
+ return ".data";
}
@@ -891,7 +934,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isThreadLocal())
return TLSDataSection;
- if (Kind.isReadOnly())
+ if (Kind.isReadOnly() || Kind.isReadOnlyWithRel())
return ReadOnlySection;
// Note: we claim that common symbols are put in BSSSection, but they are
@@ -958,29 +1001,14 @@ emitModuleFlags(MCStreamer &Streamer,
}
}
-static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
- const MCSection *Sec,
- const MCSymbol *KeySym) {
- // Return the normal section if we don't have to be associative.
- if (!KeySym)
- return Sec;
-
- // Make an associative section with the same name and kind as the normal
- // section.
- const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec);
- unsigned Characteristics =
- SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT;
- return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics,
- SecCOFF->getKind(), KeySym->getName(),
- COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
-}
-
const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym);
+ return getContext().getAssociativeCOFFSection(
+ cast<MCSectionCOFF>(StaticCtorSection), KeySym);
}
const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym);
+ return getContext().getAssociativeCOFFSection(
+ cast<MCSectionCOFF>(StaticDtorSection), KeySym);
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 3ca2017..618d903 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -51,3 +51,10 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const {
StringRef TargetOptions::getTrapFunctionName() const {
return TrapFuncName;
}
+
+/// getCFIFuncName - If this returns a non-empty string, then it is the name of
+/// the function that gets called on CFI violations in CFI non-enforcing mode
+/// (!TargetOptions::CFIEnforcing).
+StringRef TargetOptions::getCFIFuncName() const {
+ return CFIFuncName;
+}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index a3a4fb3..61a66b6 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -108,7 +109,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
/// register of the given type, picking the most sub register class of
/// the right type that contains this physreg.
const TargetRegisterClass *
-TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
assert(isPhysicalRegister(reg) && "reg must be a physical register");
// Pick the most sub register class of the right type that contains
@@ -293,3 +294,11 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
// All clear, tell the register allocator to prefer this register.
Hints.push_back(Phys);
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void
+TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
+ const TargetRegisterInfo *TRI) {
+ dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n";
+}
+#endif
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index b0f2ca6..ef2dab1 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -16,7 +16,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -157,7 +156,7 @@ unsigned TargetSchedModel::computeOperandLatency(
const MachineInstr *UseMI, unsigned UseOperIdx) const {
if (!hasInstrSchedModel() && !hasInstrItineraries())
- return TII->defaultDefLatency(&SchedModel, DefMI);
+ return TII->defaultDefLatency(SchedModel, DefMI);
if (hasInstrItineraries()) {
int OperLatency = 0;
@@ -181,7 +180,7 @@ unsigned TargetSchedModel::computeOperandLatency(
// applicable to the InstrItins model. InstrSchedModel should model all
// special cases without TII hooks.
InstrLatency = std::max(InstrLatency,
- TII->defaultDefLatency(&SchedModel, DefMI));
+ TII->defaultDefLatency(SchedModel, DefMI));
return InstrLatency;
}
// hasInstrSchedModel()
@@ -222,7 +221,29 @@ unsigned TargetSchedModel::computeOperandLatency(
// FIXME: Automatically giving all implicit defs defaultDefLatency is
// undesirable. We should only do it for defs that are known to the MC
// desc like flags. Truly implicit defs should get 1 cycle latency.
- return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI);
+}
+
+unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
+ assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
+
+ unsigned SCIdx = TII->get(Opcode).getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx);
+ unsigned Latency = 0;
+
+ if (SCDesc->isValid() && !SCDesc->isVariant()) {
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ Latency = std::max(Latency, capLatency(WLEntry->Cycles));
+ }
+ return Latency;
+ }
+
+ assert(Latency && "No MI sched latency");
+ return 0;
}
unsigned
@@ -248,7 +269,7 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
return Latency;
}
}
- return TII->defaultDefLatency(&SchedModel, MI);
+ return TII->defaultDefLatency(SchedModel, MI);
}
unsigned TargetSchedModel::
@@ -268,7 +289,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
// for predicated defs.
unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
const MachineFunction &MF = *DefMI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
return computeInstrLatency(DefMI);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index f42d47b..e218a83 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -48,6 +48,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "twoaddrinstr"
@@ -544,10 +545,21 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
if (ToRegA) {
unsigned FromRegB = getMappedReg(regB, SrcRegMap);
unsigned FromRegC = getMappedReg(regC, SrcRegMap);
- bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI);
- bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI);
- if (BComp != CComp)
- return !BComp && CComp;
+ bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI);
+ bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI);
+
+ // Compute if any of the following are true:
+ // -RegB is not tied to a register and RegC is compatible with RegA.
+ // -RegB is tied to the wrong physical register, but RegC is.
+ // -RegB is tied to the wrong physical register, and RegC isn't tied.
+ if ((!FromRegB && CompC) || (FromRegB && !CompB && (!FromRegC || CompC)))
+ return true;
+ // Don't compute if any of the following are true:
+ // -RegC is not tied to a register and RegB is compatible with RegA.
+ // -RegC is tied to the wrong physical register, but RegB is.
+ // -RegC is tied to the wrong physical register, and RegB isn't tied.
+ if ((!FromRegC && CompB) || (FromRegC && !CompC && (!FromRegB || CompB)))
+ return false;
}
// If there is a use of regC between its last def (could be livein) and this
@@ -666,7 +678,7 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) {
unsigned Reg = DstReg;
while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
NewReg, IsDstPhys)) {
- if (IsCopy && !Processed.insert(UseMI))
+ if (IsCopy && !Processed.insert(UseMI).second)
break;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
@@ -1503,9 +1515,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const TargetMachine &TM = MF->getTarget();
MRI = &MF->getRegInfo();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
- InstrItins = TM.getInstrItineraryData();
+ TII = TM.getSubtargetImpl()->getInstrInfo();
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ InstrItins = TM.getSubtargetImpl()->getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 2e22082..7824f92 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -64,9 +64,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
SmallPtrSet<BasicBlock*, 8> Reachable;
// Mark all reachable blocks.
- for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
- df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
- /* Mark all reachable blocks */;
+ for (BasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
// Loop over all dead blocks, remembering them and deleting all instructions
// in them.
@@ -125,10 +124,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
// Mark all reachable blocks.
- for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
- I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
- I != E; ++I)
- /* Mark all reachable blocks */;
+ for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
// Loop over all dead blocks, remembering them and deleting all instructions
// in them.
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 704736f..0d17d43 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -36,6 +36,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -54,8 +55,8 @@ INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
- TII = mf.getTarget().getInstrInfo();
- TRI = mf.getTarget().getRegisterInfo();
+ TII = mf.getSubtarget().getInstrInfo();
+ TRI = mf.getSubtarget().getRegisterInfo();
MF = &mf;
Virt2PhysMap.clear();
@@ -123,7 +124,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
OS << '[' << PrintReg(Reg, TRI) << " -> "
<< PrintReg(Virt2PhysMap[Reg], TRI) << "] "
- << MRI->getRegClass(Reg)->getName() << "\n";
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
}
}
@@ -131,7 +132,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
- << "] " << MRI->getRegClass(Reg)->getName() << "\n";
+ << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
}
}
OS << '\n';
@@ -205,8 +206,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
TM = &MF->getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
MRI = &MF->getRegInfo();
Indexes = &getAnalysis<SlotIndexes>();
LIS = &getAnalysis<LiveIntervals>();