aboutsummaryrefslogtreecommitdiffstats
path: root/include/llvm/Target/TargetLowering.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/Target/TargetLowering.h')
-rw-r--r--include/llvm/Target/TargetLowering.h272
1 files changed, 212 insertions, 60 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 5e9978d..882dab4 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -31,6 +31,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Target/TargetCallingConv.h"
@@ -136,10 +137,9 @@ public:
llvm_unreachable("Invalid content kind");
}
- /// NOTE: The constructor takes ownership of TLOF.
- explicit TargetLoweringBase(const TargetMachine &TM,
- const TargetLoweringObjectFile *TLOF);
- virtual ~TargetLoweringBase();
+ /// NOTE: The TargetMachine owns TLOF.
+ explicit TargetLoweringBase(const TargetMachine &TM);
+ virtual ~TargetLoweringBase() {}
protected:
/// \brief Initialize all of the actions to default values.
@@ -148,7 +148,9 @@ protected:
public:
const TargetMachine &getTargetMachine() const { return TM; }
const DataLayout *getDataLayout() const { return DL; }
- const TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; }
+ const TargetLoweringObjectFile &getObjFileLowering() const {
+ return *TM.getObjFileLowering();
+ }
bool isBigEndian() const { return !IsLittleEndian; }
bool isLittleEndian() const { return IsLittleEndian; }
@@ -223,8 +225,8 @@ public:
return BypassSlowDivWidths;
}
- /// Return true if pow2 div is cheaper than a chain of srl/add/sra.
- bool isPow2DivCheap() const { return Pow2DivIsCheap; }
+ /// Return true if pow2 sdiv is cheaper than a chain of sra/srl/add/sra.
+ bool isPow2SDivCheap() const { return Pow2SDivIsCheap; }
/// Return true if Flow Control is an expensive operation that should be
/// avoided.
@@ -262,10 +264,27 @@ public:
return MaskAndBranchFoldingIsLegal;
}
- /// Return the ValueType of the result of SETCC operations. Also used to
- /// obtain the target's preferred type for the condition operand of SELECT and
- /// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other
- /// since there are no other operands to get a type hint from.
+ /// Return true if the target can combine store(extractelement VectorTy,
+ /// Idx).
+ /// \p Cost[out] gives the cost of that transformation when this is true.
+ virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const {
+ return false;
+ }
+
+ /// Return true if target supports floating point exceptions.
+ bool hasFloatingPointExceptions() const {
+ return HasFloatingPointExceptions;
+ }
+
+ /// Return true if target always beneficiates from combining into FMA for a
+ /// given value type. This must typically return false on targets where FMA
+ /// takes more cycles to execute than FADD.
+ virtual bool enableAggressiveFMAFusion(EVT VT) const {
+ return false;
+ }
+
+ /// Return the ValueType of the result of SETCC operations.
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
/// Return the ValueType for comparison libcalls. Comparions libcalls include
@@ -426,10 +445,15 @@ public:
EVT memVT; // memory VT
const Value* ptrVal; // value representing memory location
int offset; // offset off of ptrVal
+ unsigned size; // the size of the memory location
+ // (taken from memVT if zero)
unsigned align; // alignment
bool vol; // is volatile?
bool readMem; // reads memory?
bool writeMem; // writes memory?
+
+ IntrinsicInfo() : opc(0), ptrVal(nullptr), offset(0), size(0), align(1),
+ vol(false), readMem(false), writeMem(false) {}
};
/// Given an intrinsic, checks if on the target the intrinsic will need to map
@@ -517,10 +541,12 @@ public:
/// Return how this load with extension should be treated: either it is legal,
/// needs to be promoted to a larger size, needs to be expanded to some other
/// code sequence, or the target has a custom expander for it.
- LegalizeAction getLoadExtAction(unsigned ExtType, MVT VT) const {
- assert(ExtType < ISD::LAST_LOADEXT_TYPE && VT < MVT::LAST_VALUETYPE &&
+ LegalizeAction getLoadExtAction(unsigned ExtType, EVT VT) const {
+ if (VT.isExtended()) return Expand;
+ unsigned I = (unsigned) VT.getSimpleVT().SimpleTy;
+ assert(ExtType < ISD::LAST_LOADEXT_TYPE && I < MVT::LAST_VALUETYPE &&
"Table isn't big enough!");
- return (LegalizeAction)LoadExtActions[VT.SimpleTy][ExtType];
+ return (LegalizeAction)LoadExtActions[I][ExtType];
}
/// Return true if the specified load with extension is legal on this target.
@@ -532,11 +558,13 @@ public:
/// Return how this store with truncation should be treated: either it is
/// legal, needs to be promoted to a larger size, needs to be expanded to some
/// other code sequence, or the target has a custom expander for it.
- LegalizeAction getTruncStoreAction(MVT ValVT, MVT MemVT) const {
- assert(ValVT < MVT::LAST_VALUETYPE && MemVT < MVT::LAST_VALUETYPE &&
+ LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
+ if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
+ unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
+ unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
+ assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!");
- return (LegalizeAction)TruncStoreActions[ValVT.SimpleTy]
- [MemVT.SimpleTy];
+ return (LegalizeAction)TruncStoreActions[ValI][MemI];
}
/// Return true if the specified store with truncation is legal on this
@@ -773,14 +801,15 @@ public:
///
/// This function returns true if the target allows unaligned memory accesses
/// of the specified type in the given address space. If true, it also returns
- /// whether the unaligned memory access is "fast" in the third argument by
+ /// whether the unaligned memory access is "fast" in the last argument by
/// reference. This is used, for example, in situations where an array
/// copy/move/set is converted to a sequence of store operations. Its use
/// helps to ensure that such replacements don't generate code that causes an
/// alignment error (trap) on the target machine.
- virtual bool allowsUnalignedMemoryAccesses(EVT,
- unsigned AddrSpace = 0,
- bool * /*Fast*/ = nullptr) const {
+ virtual bool allowsMisalignedMemoryAccesses(EVT,
+ unsigned AddrSpace = 0,
+ unsigned Align = 1,
+ bool * /*Fast*/ = nullptr) const {
return false;
}
@@ -823,11 +852,6 @@ public:
return UseUnderscoreLongJmp;
}
- /// Return whether the target can generate code for jump tables.
- bool supportJumpTables() const {
- return SupportJumpTables;
- }
-
/// Return integer threshold on number of blocks to use jump tables rather
/// than if sequence.
int getMinimumJumpTableEntries() const {
@@ -922,9 +946,13 @@ public:
/// @}
//===--------------------------------------------------------------------===//
- /// \name Helpers for load-linked/store-conditional atomic expansion.
+ /// \name Helpers for atomic expansion.
/// @{
+ /// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional
+ /// and expand AtomicCmpXchgInst.
+ virtual bool hasLoadLinkedStoreConditional() const { return false; }
+
/// Perform a load-linked operation on Addr, returning a "Value *" with the
/// corresponding pointee type. This may entail some non-trivial operations to
/// truncate or reconstruct types that will be illegal in the backend. See
@@ -941,15 +969,90 @@ public:
llvm_unreachable("Store conditional unimplemented on this target");
}
- /// Return true if the given (atomic) instruction should be expanded by the
- /// IR-level AtomicExpandLoadLinked pass into a loop involving
- /// load-linked/store-conditional pairs. Atomic stores will be expanded in the
- /// same way as "atomic xchg" operations which ignore their output if needed.
- virtual bool shouldExpandAtomicInIR(Instruction *Inst) const {
+ /// Inserts in the IR a target-specific intrinsic specifying a fence.
+ /// It is called by AtomicExpandPass before expanding an
+ /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad.
+ /// RMW and CmpXchg set both IsStore and IsLoad to true.
+ /// This function should either return a nullptr, or a pointer to an IR-level
+ /// Instruction*. Even complex fence sequences can be represented by a
+ /// single Instruction* through an intrinsic to be lowered later.
+ /// Backends with !getInsertFencesForAtomic() should keep a no-op here.
+ /// Backends should override this method to produce target-specific intrinsic
+ /// for their fences.
+ /// FIXME: Please note that the default implementation here in terms of
+ /// IR-level fences exists for historical/compatibility reasons and is
+ /// *unsound* ! Fences cannot, in general, be used to restore sequential
+ /// consistency. For example, consider the following example:
+ /// atomic<int> x = y = 0;
+ /// int r1, r2, r3, r4;
+ /// Thread 0:
+ /// x.store(1);
+ /// Thread 1:
+ /// y.store(1);
+ /// Thread 2:
+ /// r1 = x.load();
+ /// r2 = y.load();
+ /// Thread 3:
+ /// r3 = y.load();
+ /// r4 = x.load();
+ /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
+ /// seq_cst. But if they are lowered to monotonic accesses, no amount of
+ /// IR-level fences can prevent it.
+ /// @{
+ virtual Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return nullptr;
+
+ if (isAtLeastRelease(Ord) && IsStore)
+ return Builder.CreateFence(Ord);
+ else
+ return nullptr;
+ }
+
+ virtual Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return nullptr;
+
+ if (isAtLeastAcquire(Ord))
+ return Builder.CreateFence(Ord);
+ else
+ return nullptr;
+ }
+ /// @}
+
+ /// Returns true if the given (atomic) store should be expanded by the
+ /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
+ virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return false;
}
+ /// Returns true if the given (atomic) load should be expanded by the
+ /// IR-level AtomicExpand pass into a load-linked instruction
+ /// (through emitLoadLinked()).
+ virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; }
+
+ /// Returns true if the given AtomicRMW should be expanded by the
+ /// IR-level AtomicExpand pass into a loop using LoadLinked/StoreConditional.
+ virtual bool shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const {
+ return false;
+ }
+ /// On some platforms, an AtomicRMW that never actually modifies the value
+ /// (such as fetch_add of 0) can be turned into a fence followed by an
+ /// atomic load. This may sound useless, but it makes it possible for the
+ /// processor to keep the cacheline shared, dramatically improving
+ /// performance. And such idempotent RMWs are useful for implementing some
+ /// kinds of locks, see for example (justification + benchmarks):
+ /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
+ /// This method tries doing that transformation, returning the atomic load if
+ /// it succeeds, and nullptr otherwise.
+ /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
+ /// another round of expansion.
+ virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
+ return nullptr;
+ }
//===--------------------------------------------------------------------===//
// TargetLowering Configuration Methods - These methods should be invoked by
// the derived class constructor to configure this object for the target.
@@ -996,11 +1099,6 @@ protected:
UseUnderscoreLongJmp = Val;
}
- /// Indicate whether the target can generate code for jump tables.
- void setSupportJumpTables(bool Val) {
- SupportJumpTables = Val;
- }
-
/// Indicate the number of blocks to generate jump tables rather than if
/// sequence.
void setMinimumJumpTableEntries(int Val) {
@@ -1058,15 +1156,21 @@ protected:
/// possible, should be replaced by an alternate sequence of instructions not
/// containing an integer divide.
void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
+
+ /// Tells the code generator that this target supports floating point
+ /// exceptions and cares about preserving floating point exception behavior.
+ void setHasFloatingPointExceptions(bool FPExceptions = true) {
+ HasFloatingPointExceptions = FPExceptions;
+ }
/// Tells the code generator which bitwidths to bypass.
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
}
- /// Tells the code generator that it shouldn't generate srl/add/sra for a
- /// signed divide by power of two, and let the target handle it.
- void setPow2DivIsCheap(bool isCheap = true) { Pow2DivIsCheap = isCheap; }
+ /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a
+ /// signed divide by power of two; let the target handle it.
+ void setPow2SDivIsCheap(bool isCheap = true) { Pow2SDivIsCheap = isCheap; }
/// Add the specified register class as an available regclass for the
/// specified value type. This indicates the selector can handle values of
@@ -1451,7 +1555,6 @@ public:
private:
const TargetMachine &TM;
const DataLayout *DL;
- const TargetLoweringObjectFile &TLOF;
/// True if this is a little endian target.
bool IsLittleEndian;
@@ -1485,15 +1588,19 @@ private:
/// div/rem when the operands are positive and less than 256.
DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
- /// Tells the code generator that it shouldn't generate srl/add/sra for a
- /// signed divide by power of two, and let the target handle it.
- bool Pow2DivIsCheap;
+ /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a
+ /// signed divide by power of two; let the target handle it.
+ bool Pow2SDivIsCheap;
/// Tells the code generator that it shouldn't generate extra flow control
/// instructions and should attempt to combine flow control instructions via
/// predication.
bool JumpIsExpensive;
+ /// Whether the target supports or cares about preserving floating point
+ /// exception behavior.
+ bool HasFloatingPointExceptions;
+
/// This target prefers to use _setjmp to implement llvm.setjmp.
///
/// Defaults to false.
@@ -1504,10 +1611,6 @@ private:
/// Defaults to false.
bool UseUnderscoreLongJmp;
- /// Whether the target can generate code for jumptables. If it's not true,
- /// then each jumptable must be lowered into if-then-else's.
- bool SupportJumpTables;
-
/// Number of blocks threshold to use jump tables.
int MinimumJumpTableEntries;
@@ -1635,7 +1738,7 @@ public:
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
assert(
- (LA == TypeLegal ||
+ (LA == TypeLegal || LA == TypeSoftenFloat ||
ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)
&& "Promote may not follow Expand or Promote");
@@ -1861,9 +1964,8 @@ class TargetLowering : public TargetLoweringBase {
void operator=(const TargetLowering&) LLVM_DELETED_FUNCTION;
public:
- /// NOTE: The constructor takes ownership of TLOF.
- explicit TargetLowering(const TargetMachine &TM,
- const TargetLoweringObjectFile *TLOF);
+ /// NOTE: The TargetMachine owns TLOF.
+ explicit TargetLowering(const TargetMachine &TM);
/// Returns true by value, base pointer and offset pointer and addressing mode
/// by reference if the node's address can be legally represented as
@@ -2324,9 +2426,9 @@ public:
/// all the time, e.g. i1 on x86-64. It is also not necessary for non-C
/// calling conventions. The frontend should handle this and include all of
/// the necessary information.
- virtual MVT getTypeForExtArgOrReturn(MVT VT,
+ virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType /*ExtendKind*/) const {
- MVT MinVT = getRegisterType(MVT::i32);
+ EVT MinVT = getRegisterType(Context, MVT::i32);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
@@ -2474,11 +2576,10 @@ public:
unsigned getMatchedOperand() const;
/// Copy constructor for copying from a ConstraintInfo.
- AsmOperandInfo(const InlineAsm::ConstraintInfo &info)
- : InlineAsm::ConstraintInfo(info),
- ConstraintType(TargetLowering::C_Unknown),
- CallOperandVal(nullptr), ConstraintVT(MVT::Other) {
- }
+ AsmOperandInfo(InlineAsm::ConstraintInfo Info)
+ : InlineAsm::ConstraintInfo(std::move(Info)),
+ ConstraintType(TargetLowering::C_Unknown), CallOperandVal(nullptr),
+ ConstraintVT(MVT::Other) {}
};
typedef std::vector<AsmOperandInfo> AsmOperandInfoVector;
@@ -2545,6 +2646,45 @@ public:
SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
bool IsAfterLegalization,
std::vector<SDNode *> *Created) const;
+ virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ return SDValue();
+ }
+
+ /// Hooks for building estimates in place of slower divisions and square
+ /// roots.
+
+ /// Return a reciprocal square root estimate value for the input operand.
+ /// The RefinementSteps output is the number of Newton-Raphson refinement
+ /// iterations required to generate a sufficient (though not necessarily
+ /// IEEE-754 compliant) estimate for the value type.
+ /// The boolean UseOneConstNR output is used to select a Newton-Raphson
+ /// algorithm implementation that uses one constant or two constants.
+ /// A target may choose to implement its own refinement within this function.
+ /// If that's true, then return '0' as the number of RefinementSteps to avoid
+ /// any further refinement of the estimate.
+ /// An empty SDValue return means no estimate sequence can be created.
+ virtual SDValue getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const {
+ return SDValue();
+ }
+
+ /// Return a reciprocal estimate value for the input operand.
+ /// The RefinementSteps output is the number of Newton-Raphson refinement
+ /// iterations required to generate a sufficient (though not necessarily
+ /// IEEE-754 compliant) estimate for the value type.
+ /// A target may choose to implement its own refinement within this function.
+ /// If that's true, then return '0' as the number of RefinementSteps to avoid
+ /// any further refinement of the estimate.
+ /// An empty SDValue return means no estimate sequence can be created.
+ virtual SDValue getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ return SDValue();
+ }
//===--------------------------------------------------------------------===//
// Legalization utility functions
@@ -2564,6 +2704,12 @@ public:
SDValue LH = SDValue(), SDValue RL = SDValue(),
SDValue RH = SDValue()) const;
+ /// Expand float(f32) to SINT(i64) conversion
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
//
@@ -2583,6 +2729,12 @@ public:
/// ARM 's' setting instructions.
virtual void
AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const;
+
+ /// If this function returns true, SelectionDAGBuilder emits a
+ /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector.
+ virtual bool useLoadStackGuardNode() const {
+ return false;
+ }
};
/// Given an LLVM IR type and return type attributes, compute the return value