aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/Android.mk1
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt1
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1719
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp62
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp150
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp89
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h7
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp38
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp197
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp224
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp435
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h36
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp128
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp679
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.h138
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp92
22 files changed, 3190 insertions, 861 deletions
diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk
index 0e52ee3..9501ad9 100644
--- a/lib/CodeGen/SelectionDAG/Android.mk
+++ b/lib/CodeGen/SelectionDAG/Android.mk
@@ -22,6 +22,7 @@ codegen_selectiondag_SRC_FILES := \
SelectionDAGDumper.cpp \
SelectionDAGISel.cpp \
SelectionDAGPrinter.cpp \
+ StatepointLowering.cpp \
TargetLowering.cpp \
TargetSelectionDAGInfo.cpp
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 75e8167..fbedf2c 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_library(LLVMSelectionDAG
SelectionDAGDumper.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
+ StatepointLowering.cpp
ScheduleDAGVLIW.cpp
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a1291ed..6129401 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17,9 +17,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -303,6 +303,8 @@ namespace {
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
+ SDValue visitMLOAD(SDNode *N);
+ SDValue visitMSTORE(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -325,6 +327,7 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue CombineExtLoad(SDNode *N);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
@@ -361,6 +364,28 @@ namespace {
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
+ /// Holds a pointer to an LSBaseSDNode as well as information on where it
+ /// is located in a sequence of memory operations connected by a chain.
+ struct MemOpLink {
+ MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+ MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+ // What is the sequence number of this mem node.
+ // Lowest mem operand in the DAG starts at zero.
+ unsigned SequenceNum;
+ };
+
+ /// This is a helper function for MergeConsecutiveStores. When the source
+ /// elements of the consecutive stores are all constants or all extracted
+ /// vector elements, try to merge them into one larger store.
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumElem,
+ bool IsConstantSrc, bool UseVector);
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
@@ -378,12 +403,9 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ auto *F = DAG.getMachineFunction().getFunction();
+ ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
+ F->hasFnAttribute(Attribute::MinSize);
}
/// Runs the dag combiner on all nodes in the work list
@@ -444,7 +466,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
}
SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
@@ -736,10 +758,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ return SDValue();
}
if (N0.hasOneUse()) {
// reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
@@ -757,10 +778,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ return SDValue();
}
if (N1.hasOneUse()) {
// reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
@@ -785,11 +805,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
N->dump(&DAG);
dbgs() << "\nWith: ";
To[0].getNode()->dump(&DAG);
- dbgs() << " and " << NumTo-1 << " other values\n";
- for (unsigned i = 0, e = NumTo; i != e; ++i)
- assert((!To[i].getNode() ||
- N->getValueType(i) == To[i].getValueType()) &&
- "Cannot combine value to value of different type!"));
+ dbgs() << " and " << NumTo-1 << " other values\n");
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!");
+
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
@@ -874,8 +895,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
return DAG.getExtLoad(ExtType, dl, PVT,
@@ -1096,8 +1117,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
@@ -1160,10 +1181,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalTypes = Level >= AfterLegalizeTypes;
// Early exit if this basic block is in an optnone function.
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeNone))
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::OptimizeNone))
return;
// Add all the dag nodes to the worklist.
@@ -1351,6 +1370,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
+ case ISD::MLOAD: return visitMLOAD(N);
+ case ISD::MSTORE: return visitMSTORE(N);
}
return SDValue();
}
@@ -1475,7 +1496,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
- // rededundant.
+ // redundant.
Changed = true;
break;
@@ -1504,7 +1525,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
SDValue Result;
- // If we've change things around then replace token factor.
+ // If we've changed things around then replace token factor.
if (Changed) {
if (Ops.empty()) {
// The entry token is the only possible outcome.
@@ -1514,8 +1535,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
}
- // Don't add users to work list.
- return CombineTo(N, Result, false);
+ // Add users to worklist if AA is enabled, since it may introduce
+ // a lot of new chained token factors while removing memory deps.
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ return CombineTo(N, Result, UseAA /*add to worklist*/);
}
return Result;
@@ -1541,8 +1565,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
@@ -1563,6 +1585,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
// canonicalize constant to RHS
@@ -1714,8 +1738,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
@@ -1725,6 +1747,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
SDLoc(N), MVT::Glue));
// canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
@@ -1756,10 +1780,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
// canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
N1, N0, CarryIn);
@@ -1786,10 +1810,6 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
- dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
// fold vector ops
@@ -1807,6 +1827,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// fold (sub c1, c2) -> c1-c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
@@ -1826,6 +1848,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
VT);
@@ -1890,8 +1914,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an SUB.
@@ -1907,6 +1929,8 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
MVT::Glue));
// fold (subc x, 0) -> x + no borrow
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
@@ -2055,8 +2079,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
@@ -2066,6 +2088,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
}
// fold (sdiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
@@ -2145,8 +2169,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
@@ -2156,6 +2178,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
// fold (udiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
// fold (udiv x, (1 << c)) -> x >>u c
@@ -2197,11 +2221,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
// If we know the sign bits of both operands are zero, strength reduce to a
@@ -2239,11 +2263,11 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
// fold (urem x, pow2) -> (and x, pow2-1)
@@ -2522,6 +2546,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
//
// do not sink logical op inside of a vector extend, since it may combine
@@ -2529,6 +2554,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
EVT Op0VT = N0.getOperand(0).getValueType();
if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::BSWAP ||
// Avoid infinite looping with PromoteIntBinOp.
(N0.getOpcode() == ISD::ANY_EXTEND &&
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
@@ -2662,11 +2688,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
- unsigned BitWidth = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
@@ -2698,6 +2720,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// fold (and c1, c2) -> c1&c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
// canonicalize constant to RHS
@@ -2707,6 +2731,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && N1C->isAllOnesValue())
return N0;
// if (and x, c) is known to be zero, return 0
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, VT);
@@ -2793,6 +2818,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// actually legal and isn't going to get expanded, else this is a false
// optimisation.
bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getValueType(0),
Load->getMemoryVT());
// Resize the constant to the same size as the original memory access before
@@ -2838,6 +2864,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
@@ -2919,7 +2946,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
@@ -2939,7 +2966,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
@@ -2965,10 +2992,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT LoadedVT = LN0->getMemoryVT();
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
if (ExtVT == LoadedVT &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
+ ExtVT))) {
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
@@ -2983,7 +3011,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
+ ExtVT))) {
EVT PtrType = LN0->getOperand(1).getValueType();
unsigned Alignment = LN0->getAlignment();
@@ -3003,7 +3032,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
AddToWorklist(NewPtr.getNode());
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), NewPtr,
@@ -3313,9 +3341,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
// fold vector ops
@@ -3407,6 +3432,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
}
// fold (or c1, c2) -> c1|c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
// canonicalize constant to RHS
@@ -3440,15 +3467,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
- SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1);
- if (!COR.getNode())
- return SDValue();
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- DAG.getNode(ISD::OR, SDLoc(N0), VT,
- N0.getOperand(0), N1), COR);
+ if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1))
+ return DAG.getNode(
+ ISD::AND, SDLoc(N), VT,
+ DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
+ return SDValue();
}
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
@@ -3521,6 +3548,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
+ // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(1), N1.getOperand(1));
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X);
+ }
+
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
@@ -3790,9 +3828,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LHS, RHS, CC;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
@@ -3816,6 +3851,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (xor c1, c2) -> c1^c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
// canonicalize constant to RHS
@@ -3830,6 +3867,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return RXOR;
// fold !(x cc y) -> (x !cc y)
+ SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
@@ -4039,12 +4077,11 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
@@ -4061,8 +4098,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
- SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
- if (C.getNode())
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
} else {
@@ -4072,6 +4108,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
// fold (shl c1, c2) -> c1<<c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
// fold (shl 0, x) -> 0
@@ -4220,12 +4257,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
@@ -4234,6 +4270,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
// fold (sra c1, c2) -> (sra c1, c2)
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
// fold (sra 0, x) -> 0
@@ -4366,12 +4403,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
@@ -4380,6 +4416,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl c1, c2) -> c1 >>u c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
// fold (srl 0, x) -> 0
@@ -4608,13 +4645,47 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
+
+/// \brief Generate Min/Max node
+static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
+ SDValue True, SDValue False,
+ ISD::CondCode CC, const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ switch (CC) {
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
+ if (TLI.isOperationLegal(Opcode, VT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: {
+ unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
+ if (TLI.isOperationLegal(Opcode, VT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ default:
+ return SDValue();
+ }
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
@@ -4622,12 +4693,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (N1 == N2)
return N1;
// fold (select true, X, Y) -> X
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && !N0C->isNullValue())
return N1;
// fold (select false, X, Y) -> Y
if (N0C && N0C->isNullValue())
return N2;
// fold (select C, 1, X) -> (or C, X)
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
@@ -4639,6 +4712,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
if (VT.isInteger() &&
(VT0 == MVT::i1 || (VT0.isInteger() &&
TLI.getBooleanContents(false, false) ==
@@ -4687,6 +4761,28 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
+ // select x, y (fcmp lt x, y) -> fminnum x, y
+ // select x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+
+ // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
+ // no signed zeros as well as no nans.
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.UnsafeFPMath &&
+ VT.isFloatingPoint() && N0.hasOneUse() &&
+ DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+
+ SDValue FMinMax =
+ combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
+ N1, N2, CC, TLI, DAG);
+ if (FMinMax)
+ return FMinMax;
+ }
+
if ((!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
TLI.isOperationLegal(ISD::SELECT_CC, VT))
@@ -4771,6 +4867,166 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
+SDValue DAGCombiner::visitMSTORE(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ SDValue Data = MST->getValue();
+ SDLoc DL(N);
+
+ // If the MSTORE data type requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (Mask.getOpcode() == ISD::SETCC) {
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
+
+ SDValue Chain = MST->getChain();
+ SDValue Ptr = MST->getBasePtr();
+
+ EVT MemoryVT = MST->getMemoryVT();
+ unsigned Alignment = MST->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MST->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, MST->getAAInfo(), MST->getRanges());
+
+ Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ MST->isTruncatingStore());
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MST->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MST->getAAInfo(),
+ MST->getRanges());
+
+ Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ MST->isTruncatingStore());
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMLOAD(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+ SDValue Mask = MLD->getMask();
+ SDLoc DL(N);
+
+ // If the MLOAD result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+ if (Mask.getOpcode() == ISD::SETCC) {
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ SDValue Src0 = MLD->getSrc0();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Chain = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ EVT MemoryVT = MLD->getMemoryVT();
+ unsigned Alignment = MLD->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ ISD::NON_EXTLOAD);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ ISD::NON_EXTLOAD);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
+
+ SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+ SDValue RetOps[] = { LoadRes, Chain };
+ return DAG.getMergeValues(RetOps, DL);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4880,13 +5136,16 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
- }
-
- // Fold to a simpler select_cc
- if (SCC.getOpcode() == ISD::SETCC)
+ } else if (SCC->getOpcode() == ISD::UNDEF) {
+ // When the condition is UNDEF, just return the first operand. This is
+ // coherent the DAG creation, no setcc node is created in this case
+ return N2;
+ } else if (SCC.getOpcode() == ISD::SETCC) {
+ // Fold to a simpler select_cc
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
SCC.getOperand(0), SCC.getOperand(1), N2, N3,
SCC.getOperand(2));
+ }
}
// If we can fold this based on the true/false value, do so.
@@ -5047,6 +5306,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
}
}
+// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
+SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) &&
+ "Unexpected node type (not an extend)!");
+
+ // fold (sext (load x)) to multiple smaller sextloads; same for zext.
+ // For example, on a target with legal v4i32, but illegal v8i32, turn:
+ // (v8i32 (sext (v8i16 (load x))))
+ // into:
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))
+ // Where uses of the original load, i.e.:
+ // (v8i16 (load x))
+ // are replaced with:
+ // (v8i16 (truncate
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))))
+ //
+ // This combine is only applicable to illegal, but splittable, vectors.
+ // All legal types, and illegal non-vector types, are handled elsewhere.
+ // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
+ //
+ if (N0->getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
+ !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
+ !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
+ return SDValue();
+
+ ISD::LoadExtType ExtType =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+
+ // Try to split the vector types to get down to legal types.
+ EVT SplitSrcVT = SrcVT;
+ EVT SplitDstVT = DstVT;
+ while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+ SplitSrcVT.getVectorNumElements() > 1) {
+ SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
+ SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
+ }
+
+ if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ const unsigned NumSplits =
+ DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
+ const unsigned Stride = SplitSrcVT.getStoreSize();
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> Chains;
+
+ SDValue BasePtr = LN0->getBasePtr();
+ for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
+ const unsigned Offset = Idx * Stride;
+ const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+
+ SDValue SplitLoad = DAG.getExtLoad(
+ ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
+ LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
+ LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
+ Align, LN0->getAAInfo());
+
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, BasePtr.getValueType()));
+
+ Loads.push_back(SplitLoad.getValue(0));
+ Chains.push_back(SplitLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+
+ CombineTo(N, NewValue);
+
+ // Replace uses of the original load (before extension)
+ // with a truncate of the concatenated sextloaded vectors.
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+ CombineTo(N0.getNode(), Trunc, NewChain);
+ ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
+ (ISD::NodeType)N->getOpcode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -5113,17 +5468,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
// fold (sext (load x)) -> (sext (truncate (sextload x)))
- // None of the supported targets knows how to perform load and sign extend
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+ // Only generate vector extloads when 1) they're legal, and 2) they are
+ // deemed desirable by the target.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ((!LegalOperations && !VT.isVector() &&
+ !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -5140,6 +5496,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
+ // fold (sext (load x)) to multiple smaller sextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
// fold (sext (sextload x)) -> (sext (truncate (sextload x)))
// fold (sext ( extload x)) -> (sext (truncate (sextload x)))
if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
@@ -5147,7 +5508,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), MemVT,
@@ -5167,7 +5528,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
@@ -5403,17 +5764,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// fold (zext (load x)) -> (zext (truncate (zextload x)))
- // None of the supported targets knows how to perform load and vector_zext
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ // Only generate vector extloads when 1) they're legal, and 2) they are
+ // deemed desirable by the target.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ((!LegalOperations && !VT.isVector() &&
+ !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
@@ -5431,13 +5793,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
}
+ // fold (zext (load x)) to multiple smaller zextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
@@ -5474,7 +5841,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), MemVT,
@@ -5636,7 +6003,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
@@ -5666,7 +6033,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
- if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) {
+ if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
VT, LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
@@ -5795,7 +6162,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() - N01->getZExtValue());
}
- if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
return SDValue();
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -5874,6 +6241,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
return SDValue();
+ if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
+ return SDValue();
+
EVT PtrType = N0.getOperand(1).getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
@@ -5999,7 +6369,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
ISD::isUNINDEXEDLoad(N0.getNode()) &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -6015,7 +6385,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
N0.hasOneUse() &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -6318,19 +6688,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
- if (Res.getNode() != N) {
- if (!LegalOperations ||
- TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
- return Res;
-
- // Folding it resulted in an illegal node, and it's too late to
- // do that. Clean up the old node and forego the transformation.
- // Ideally this won't happen very often, because instcombine
- // and the earlier dagcombine runs (where illegal nodes are
- // permitted) should have folded most of them already.
- deleteAndRecombine(Res.getNode());
- }
+ // If we can't allow illegal operations, we need to check that this is just
+ // a fp -> int or int -> conversion and that the resulting operation will
+ // be legal.
+ if (!LegalOperations ||
+ (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::Constant, VT)))
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
}
// (conv (conv x, t1), t2) -> (conv x, t2)
@@ -6489,7 +6855,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (SrcEltVT.isFloatingPoint()) {
// Convert the input float vector to a int vector where the elements are the
// same sizes.
- assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
@@ -6498,7 +6863,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// Now we know the input is an integer vector. If the output is a FP type,
// convert to integer first, then to FP of the right size.
if (DstEltVT.isFloatingPoint()) {
- assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
@@ -6549,8 +6913,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
- for (unsigned j = 0; j != NumOutputsPerInput; ++j)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
+ Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
continue;
}
@@ -6575,6 +6938,133 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
+// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
+static SDValue performFaddFmulCombines(unsigned FusedOpcode,
+ bool Aggressive,
+ SDNode *N,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (N0.getOpcode() == ISD::FMA &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1));
+ }
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (N1->getOpcode() == ISD::FMA &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue performFsubFmulCombines(unsigned FusedOpcode,
+ bool Aggressive,
+ SDNode *N,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ SDLoc SL(N);
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse()))
+ return DAG.getNode(FusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+
+ // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(FusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (N0.getOpcode() == FusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1)));
+ }
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (N1.getOpcode() == FusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N20),
+ N21, N0));
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -6714,23 +7204,55 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
} // enable-unsafe-fp-math
+ if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Assume if there is an fmad instruction that it should be aggressively
+ // used.
+ if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
+ return Fused;
+ }
+
// FADD -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
- // fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1), N1);
+ if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Don't form FMA if we are preferring FMAD.
+ if (SDValue Fused
+ = performFaddFmulCombines(ISD::FMA,
+ TLI.enableAggressiveFMAFusion(VT),
+ N, TLI, DAG)) {
+ return Fused;
+ }
+ }
- // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1), N0);
+ // When FP_EXTEND nodes are free on the target, and there is an opportunity
+ // to combine into FMA, arrange such nodes accordingly.
+ if (TLI.isFPExtFree(VT)) {
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N00.getOperand(1)), N1);
+ }
+
+ // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N10.getOperand(1)), N0);
+ }
+ }
}
return SDValue();
@@ -6792,37 +7314,95 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
+ if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Assume if there is an fmad instruction that it should be aggressively
+ // used.
+ if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
+ return Fused;
+ }
+
// FSUB -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
- // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
-
- // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- N1.getOperand(0)),
- N1.getOperand(1), N0);
-
- // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
- ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
- TLI.enableAggressiveFMAFusion(VT))) {
- SDValue N00 = N0.getOperand(0).getOperand(0);
- SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
- DAG.getNode(ISD::FNEG, dl, VT, N1));
+ if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Don't form FMA if we are preferring FMAD.
+
+ if (SDValue Fused
+ = performFsubFmulCombines(ISD::FMA,
+ TLI.enableAggressiveFMAFusion(VT),
+ N, TLI, DAG)) {
+ return Fused;
+ }
+ }
+
+ // When FP_EXTEND nodes are free on the target, and there is an opportunity
+ // to combine into FMA, arrange such nodes accordingly.
+ if (TLI.isFPExtFree(VT)) {
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
+ }
+
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ VT, N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N10.getOperand(1)),
+ N0);
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ VT, N000.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N000.getOperand(1)),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+ }
+
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ VT, N000.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
+ N000.getOperand(1)),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+ }
}
}
@@ -7104,6 +7684,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
}
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal.
+ // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+ // Notice that this is not always beneficial. One reason is different target
+ // may have different costs for FDIV and FMUL, so sometimes the cost of two
+ // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+ // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+ if (Options.UnsafeFPMath) {
+ // Skip if current node is a reciprocal.
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> Users;
+ // Find all FDIV users of the same divisor.
+ for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
+ UE = N1.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = UI.getUse().getUser();
+ if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
+ Users.push_back(User);
+ }
+
+ if (TLI.combineRepeatedFPDivisors(Users.size())) {
+ SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
+ if ((*I)->getOperand(0) != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
+ (*I)->getOperand(0), Reciprocal);
+ DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
+ }
+ }
+ return SDValue();
+ }
+ }
+
return SDValue();
}
@@ -7122,7 +7740,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ !TLI.isFsqrtCheap()) {
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
EVT VT = RV.getValueType();
@@ -7198,11 +7817,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
@@ -7251,11 +7870,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
@@ -7289,6 +7908,50 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return SDValue();
}
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+ return SDValue();
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+ bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+ unsigned ActualSize = std::min(InputSize, OutputSize);
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+ // We can only fold away the float conversion if the input range can be
+ // represented exactly in the float range.
+ if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+ if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+ unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+ }
+ if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+ if (SrcVT == VT)
+ return Src;
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
@@ -7298,7 +7961,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
if (N0CFP)
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
- return SDValue();
+ return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
@@ -7310,7 +7973,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
if (N0CFP)
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
- return SDValue();
+ return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
@@ -7329,11 +7992,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {
- // This is a value preserving truncation if both round's are.
- bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
- N0.getNode()->getConstantOperandVal(1) == 1;
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getIntPtrConstant(IsTrunc));
+ const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+ // If the first fp_round isn't a value preserving truncation, it might
+ // introduce a tie in the second fp_round, that wouldn't occur in the
+ // single-step fp_round we want to fold to.
+ // In other words, double rounding isn't the same as rounding.
+ // Also, this is a value preserving truncation iff both fp_round's are.
+ if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc));
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
@@ -7391,7 +8059,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -8923,7 +9591,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
if (NotMaskLZ == 64) return Result; // All zero mask.
// See if we have a continuous run of bits. If so, we have 0*1+0*
- if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+ if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
return Result;
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
@@ -9070,9 +9738,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ // The narrowing should be profitable, the load/store operation should be
+ // legal (or custom) and the store size should be equal to the NewVT width.
while (NewBW < BitWidth &&
- !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
- TLI.isNarrowingProfitable(VT, NewVT))) {
+ (NewVT.getStoreSizeInBits() != NewBW ||
+ !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+ !TLI.isNarrowingProfitable(VT, NewVT))) {
NewBW = NextPowerOf2(NewBW);
NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
}
@@ -9272,36 +9943,139 @@ struct BaseIndexOffset {
}
};
-/// Holds a pointer to an LSBaseSDNode as well as information on where it
-/// is located in a sequence of memory operations connected by a chain.
-struct MemOpLink {
- MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
- MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
- // Ptr to the mem node.
- LSBaseSDNode *MemNode;
- // Offset from the base ptr.
- int64_t OffsetFromBase;
- // What is the sequence number of this mem node.
- // Lowest mem operand in the DAG starts at zero.
- unsigned SequenceNum;
-};
+bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
+ unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+ // Make sure we have something to merge.
+ if (NumElem < 2)
+ return false;
+
+ int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned EarliestNodeUsed = 0;
+
+ for (unsigned i=0; i < NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // The earliest Node in the DAG.
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ SDLoc DL(StoreNodes[0].MemNode);
+
+ SDValue StoredVal;
+ if (UseVector) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ if (IsConstantSrc) {
+ // A vector store with a constant source implies that the constant is
+ // zero; we only handle merging stores of constant zeros because the zero
+ // can be materialized without a load.
+ // It may be beneficial to loosen this restriction to allow non-zero
+ // store merging.
+ StoredVal = DAG.getConstant(0, Ty);
+ } else {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = St->getValue();
+ // All of the operands of a BUILD_VECTOR must have the same type.
+ if (Val.getValueType() != MemVT)
+ return false;
+ Ops.push_back(Val);
+ }
+
+ // Build the extracted vector elements back into a vector.
+ StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
+ }
+ } else {
+ // We should always use a vector store when merging extracted vector
+ // elements, so this path implies a store of constants.
+ assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ APInt StoreInt(StoreBW, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = TLI.isLittleEndian();
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ SDValue Val = St->getValue();
+ StoreInt <<= ElementSizeBytes*8;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt |= C->getAPIntValue().zext(StoreBW);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+ } else {
+ llvm_unreachable("Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ StoredVal = DAG.getConstant(StoreInt, StoreTy);
+ }
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ false, false,
+ FirstInChain->getAlignment());
+
+ // Replace the first store with the new store
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
+ }
+
+ return true;
+}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
- bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat);
// Don't merge vectors into wider inputs.
if (MemVT.isVector() || !MemVT.isSimple())
return false;
// Perform an early exit check. Do not bother looking at stored values that
- // are not constants or loads.
+ // are not constants, loads, or extracted vector elements.
SDValue StoredVal = St->getValue();
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
- if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
- !IsLoadSrc)
+ bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+ isa<ConstantFPSDNode>(StoredVal);
+ bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
return false;
// Only look at ends of store sequences.
@@ -9443,7 +10217,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
// Store the constants into memory as one consecutive store.
- if (!IsLoadSrc) {
+ if (IsConstantSrc) {
unsigned LastLegalType = 0;
unsigned LastLegalVectorType = 0;
bool NonZero = false;
@@ -9492,85 +10266,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
- // Make sure we have something to merge.
- if (NumElem < 2)
- return false;
-
- unsigned EarliestNodeUsed = 0;
- for (unsigned i=0; i < NumElem; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // earliest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
- EarliestNodeUsed = i;
- }
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ true, UseVector);
+ }
- // The earliest Node in the DAG.
- LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
- SDLoc DL(StoreNodes[0].MemNode);
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecEltSrc) {
+ unsigned NumElem = 0;
+ for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = St->getValue();
+ // This restriction could be loosened.
+ // Bail out if any stored values are not elements extracted from a vector.
+ // It should be possible to handle mixed sources, but load sources need
+ // more careful handling (see the block of code below that handles
+ // consecutive loads).
+ if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
- SDValue StoredVal;
- if (UseVector) {
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
- assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
- StoredVal = DAG.getConstant(0, Ty);
- } else {
- unsigned StoreBW = NumElem * ElementSizeBytes * 8;
- APInt StoreInt(StoreBW, 0);
-
- // Construct a single integer constant which is made of the smaller
- // constant inputs.
- bool IsLE = TLI.isLittleEndian();
- for (unsigned i = 0; i < NumElem ; ++i) {
- unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
- SDValue Val = St->getValue();
- StoreInt<<=ElementSizeBytes*8;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
- StoreInt|=C->getAPIntValue().zext(StoreBW);
- } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
- StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
- } else {
- assert(false && "Invalid constant element type");
- }
- }
-
- // Create the new Load and Store operations.
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
- StoredVal = DAG.getConstant(StoreInt, StoreTy);
- }
-
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- false, false,
- FirstInChain->getAlignment());
-
- // Replace the first store with the new store
- CombineTo(EarliestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
- if (StoreNodes[i].MemNode == EarliestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(Ty))
+ NumElem = i + 1;
}
- return true;
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ false, true);
}
// Below we handle the case of multiple consecutive stores that
@@ -9668,9 +10390,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy))
LastLegalIntegerType = i+1;
}
}
@@ -10108,7 +10830,8 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
if (ResultVT.bitsGT(VecEltVT)) {
// If the result type of vextract is wider than the load, then issue an
// extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
+ VecEltVT)
? ISD::ZEXTLOAD
: ISD::EXTLOAD;
Load = DAG.getExtLoad(
@@ -10474,6 +11197,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
return SDValue();
+ // Just because the floating-point vector type is legal does not necessarily
+ // mean that the corresponding integer vector type is.
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
@@ -10519,26 +11247,37 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
return SDValue();
SDValue VecIn1, VecIn2;
+ bool UsesZeroVector = false;
for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue Op = N->getOperand(i);
// Ignore undef inputs.
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+
+ // See if we can combine this build_vector into a blend with a zero vector.
+ if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op.getNode())->isNullValue()) ||
+ (Op.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+ UsesZeroVector = true;
+ continue;
+ }
// If this input is something other than a EXTRACT_VECTOR_ELT with a
// constant index, bail out.
- if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op.getOperand(1))) {
VecIn1 = VecIn2 = SDValue(nullptr, 0);
break;
}
// We allow up to two distinct input vectors.
- SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ SDValue ExtractedFromVec = Op.getOperand(0);
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
if (!VecIn1.getNode()) {
VecIn1 = ExtractedFromVec;
- } else if (!VecIn2.getNode()) {
+ } else if (!VecIn2.getNode() && !UsesZeroVector) {
VecIn2 = ExtractedFromVec;
} else {
// Too many inputs.
@@ -10549,55 +11288,93 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
+ unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ unsigned Opcode = N->getOperand(i).getOpcode();
+ if (Opcode == ISD::UNDEF) {
Mask.push_back(-1);
continue;
}
+ // Operands can also be zero.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
+ assert(UsesZeroVector &&
+ (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
+ "Unexpected node found!");
+ Mask.push_back(NumInScalars+i);
+ continue;
+ }
+
// If extracting from the first vector, just use the index directly.
SDValue Extract = N->getOperand(i);
SDValue ExtVal = Extract.getOperand(1);
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (Extract.getOperand(0) == VecIn1) {
- unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
- if (ExtIndex > VT.getVectorNumElements())
- return SDValue();
-
Mask.push_back(ExtIndex);
continue;
}
- // Otherwise, use InIdx + VecSize
- unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
- Mask.push_back(Idx+NumInScalars);
+ // Otherwise, use InIdx + InputVecSize
+ Mask.push_back(InNumElements + ExtIndex);
}
+ // Avoid introducing illegal shuffles with zero.
+ if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+ return SDValue();
+
// We can't generate a shuffle node with mismatched input and output types.
// Attempt to transform a single input vector to the correct type.
if ((VT != VecIn1.getValueType())) {
- // We don't support shuffeling between TWO values of different types.
- if (VecIn2.getNode())
+ // If the input vector type has a different base type to the output
+ // vector type, bail out.
+ EVT VTElemType = VT.getVectorElementType();
+ if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
+ (VecIn2.getNode() &&
+ (VecIn2.getValueType().getVectorElementType() != VTElemType)))
return SDValue();
+ // If the input vector is too small, widen it.
// We only support widening of vectors which are half the size of the
// output registers. For example XMM->YMM widening on X86 with AVX.
- if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
- return SDValue();
+ EVT VecInT = VecIn1.getValueType();
+ if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
+ // If we only have one small input, widen it by adding undef values.
+ if (!VecIn2.getNode())
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
+ DAG.getUNDEF(VecIn1.getValueType()));
+ else if (VecIn1.getValueType() == VecIn2.getValueType()) {
+ // If we have two small inputs of the same type, try to concat them.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
+ VecIn2 = SDValue(nullptr, 0);
+ } else
+ return SDValue();
+ } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
+ // If the input vector is too large, try to split it.
+ // We don't support having two input vectors that are too large.
+ if (VecIn2.getNode())
+ return SDValue();
- // If the input vector type has a different base type to the output
- // vector type, bail out.
- if (VecIn1.getValueType().getVectorElementType() !=
- VT.getVectorElementType())
- return SDValue();
+ if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
+ return SDValue();
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ // Try to replace VecIn1 with two extract_subvectors
+ // No need to update the masks, they should still be correct.
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
+ VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ UsesZeroVector = false;
+ } else
+ return SDValue();
}
- // If VecIn2 is unused then change it to undef.
- VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ if (UsesZeroVector)
+ VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
+ DAG.getConstantFP(0.0, VT);
+ else
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
// Check that we were able to transform all incoming values to the same
// type.
@@ -10656,36 +11433,56 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
}
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
- if (N->getNumOperands() == 2 &&
- N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
- N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ bool AllBuildVectorsOrUndefs =
+ std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+ if (AllBuildVectorsOrUndefs) {
SmallVector<SDValue, 8> Opnds;
- unsigned BuildVecNumElts = N0.getNumOperands();
-
- EVT SclTy0 = N0.getOperand(0)->getValueType(0);
- EVT SclTy1 = N1.getOperand(0)->getValueType(0);
- if (SclTy0.isFloatingPoint()) {
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
- } else {
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint()) {
// If BUILD_VECTOR are from built from integer, they may have different
- // operand types. Get the smaller type and truncate all operands to it.
- EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N0.getOperand(i)));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N1.getOperand(i)));
+ // operand types. Get the smallest type and truncate all operands to it.
+ bool FoundMinVT = false;
+ for (const SDValue &Op : N->ops())
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ EVT OpSVT = Op.getOperand(0)->getValueType(0);
+ MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+ FoundMinVT = true;
+ }
+ assert(FoundMinVT && "Concat vector type mismatch");
+ }
+
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(Op.getOperand(i));
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
}
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
@@ -10881,7 +11678,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
}
-// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -10895,6 +11693,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
unsigned NumConcats = NumElts / NumElemsPerConcat;
+ // Special case: shuffle(concat(A,B)) can be more efficiently represented
+ // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+ // half vector elements.
+ if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
+ std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
+ SVN->getMask().end(), [](int i) { return i == -1; })) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
+ ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+ N1 = DAG.getUNDEF(ConcatVT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+ }
+
// Look at every vector that's inserted. We're looking for exact
// subvector-sized copies from a concatenated vector
for (unsigned I = 0; I != NumConcats; ++I) {
@@ -10993,7 +11803,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// If it is a splat, check if the argument vector is another splat or a
- // build_vector with all scalar elements the same.
+ // build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
@@ -11030,6 +11840,18 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Splat of <x, x, x, x>, return <x, x, x, x>
if (AllSame)
return N0;
+
+ // Canonicalize any other splat as a build_vector.
+ const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+ SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+ SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ V->getValueType(0), Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (V->getValueType(0) != VT)
+ NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+ return NewBV;
}
}
@@ -11050,121 +11872,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
- // If this shuffle node is simply a swizzle of another shuffle node,
- // then try to simplify it.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- N1.getOpcode() == ISD::UNDEF) {
-
- ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
-
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(OtherSV->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
-
- SmallVector<int, 4> Mask;
- // Compute the combined shuffle mask.
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- assert(Idx < (int)NumElts && "Index references undef operand");
- // Next, this index comes from the first value, which is the incoming
- // shuffle. Adopt the incoming index.
- if (Idx >= 0)
- Idx = OtherSV->getMaskElt(Idx);
- Mask.push_back(Idx);
- }
-
- // Check if all indices in Mask are Undef. In case, propagate Undef.
- bool isUndefMask = true;
- for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
- isUndefMask &= Mask[i] < 0;
-
- if (isUndefMask)
- return DAG.getUNDEF(VT);
-
- bool CommuteOperands = false;
- if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
- // To be valid, the combine shuffle mask should only reference elements
- // from one of the two vectors in input to the inner shufflevector.
- bool IsValidMask = true;
- for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
- // See if the combined mask only reference undefs or elements coming
- // from the first shufflevector operand.
- IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
-
- if (!IsValidMask) {
- IsValidMask = true;
- for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
- // Check that all the elements come from the second shuffle operand.
- IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
- CommuteOperands = IsValidMask;
- }
-
- // Early exit if the combined shuffle mask is not valid.
- if (!IsValidMask)
- return SDValue();
- }
-
- // See if this pair of shuffles can be safely folded according to either
- // of the following rules:
- // shuffle(shuffle(x, y), undef) -> x
- // shuffle(shuffle(x, undef), undef) -> x
- // shuffle(shuffle(x, y), undef) -> y
- bool IsIdentityMask = true;
- unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
- for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
- // Skip Undefs.
- if (Mask[i] < 0)
- continue;
-
- // The combined shuffle must map each index to itself.
- IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
- }
-
- if (IsIdentityMask) {
- if (CommuteOperands)
- // optimize shuffle(shuffle(x, y), undef) -> y.
- return OtherSV->getOperand(1);
-
- // optimize shuffle(shuffle(x, undef), undef) -> x
- // optimize shuffle(shuffle(x, y), undef) -> x
- return OtherSV->getOperand(0);
- }
-
- // It may still be beneficial to combine the two shuffles if the
- // resulting shuffle is legal.
- if (TLI.isTypeLegal(VT)) {
- if (!CommuteOperands) {
- if (TLI.isShuffleMaskLegal(Mask, VT))
- // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
- // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
- return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
- &Mask[0]);
- } else {
- // Compute the commuted shuffle mask.
- for (unsigned i = 0; i != NumElts; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElts)
- Mask[i] = idx + NumElts;
- else
- Mask[i] = idx - NumElts;
- }
-
- if (TLI.isShuffleMaskLegal(Mask, VT))
- // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
- return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1,
- &Mask[0]);
- }
- }
- }
-
// Canonicalize shuffles according to rules:
// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
// shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
- if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF &&
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
TLI.isTypeLegal(VT)) {
// The incoming shuffle must be of the same type as the result of the
@@ -11183,13 +11895,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// Try to fold according to rules:
- // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
// Don't try to fold shuffles with illegal type.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) {
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
// The incoming shuffle must be of the same type as the result of the
@@ -11197,14 +11909,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
assert(OtherSV->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
- SDValue SV0 = OtherSV->getOperand(0);
- SDValue SV1 = OtherSV->getOperand(1);
- bool HasSameOp0 = N1 == SV0;
- bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
- if (!HasSameOp0 && !IsSV1Undef && N1 != SV1)
- // Early exit.
- return SDValue();
-
+ SDValue SV0, SV1;
SmallVector<int, 4> Mask;
// Compute the combined shuffle mask for a shuffle with SV0 as the first
// operand, and SV1 as the second operand.
@@ -11216,14 +11921,49 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
continue;
}
+ SDValue CurrentVec;
if (Idx < (int)NumElts) {
+ // This shuffle index refers to the inner shuffle N0. Lookup the inner
+ // shuffle mask to identify which vector is actually referenced.
Idx = OtherSV->getMaskElt(Idx);
- if (IsSV1Undef && Idx >= (int) NumElts)
- Idx = -1; // Propagate Undef.
- } else
- Idx = HasSameOp0 ? Idx - NumElts : Idx;
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
+ : OtherSV->getOperand(1);
+ } else {
+ // This shuffle index references an element within N1.
+ CurrentVec = N1;
+ }
+
+ // Simple case where 'CurrentVec' is UNDEF.
+ if (CurrentVec.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Canonicalize the shuffle index. We don't know yet if CurrentVec
+ // will be the first or second operand of the combined shuffle.
+ Idx = Idx % NumElts;
+ if (!SV0.getNode() || SV0 == CurrentVec) {
+ // Ok. CurrentVec is the left hand side.
+ // Update the mask accordingly.
+ SV0 = CurrentVec;
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ // Bail out if we cannot convert the shuffle pair into a single shuffle.
+ if (SV1.getNode() && SV1 != CurrentVec)
+ return SDValue();
- Mask.push_back(Idx);
+ // Ok. CurrentVec is the right hand side.
+ // Update the mask accordingly.
+ SV1 = CurrentVec;
+ Mask.push_back(Idx + NumElts);
}
// Check if all indices in Mask are Undef. In case, propagate Undef.
@@ -11234,34 +11974,37 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (isUndefMask)
return DAG.getUNDEF(VT);
+ if (!SV0.getNode())
+ SV0 = DAG.getUNDEF(VT);
+ if (!SV1.getNode())
+ SV1 = DAG.getUNDEF(VT);
+
// Avoid introducing shuffles with illegal mask.
- if (TLI.isShuffleMaskLegal(Mask, VT)) {
- if (IsSV1Undef)
- // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
- }
+ if (!TLI.isShuffleMaskLegal(Mask, VT)) {
+ // Compute the commuted shuffle mask and test again.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElts)
+ Mask[i] = idx + NumElts;
+ else
+ Mask[i] = idx - NumElts;
+ }
- // Compute the commuted shuffle mask.
- for (unsigned i = 0; i != NumElts; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElts)
- Mask[i] = idx + NumElts;
- else
- Mask[i] = idx - NumElts;
- }
+ if (!TLI.isShuffleMaskLegal(Mask, VT))
+ return SDValue();
- if (TLI.isShuffleMaskLegal(Mask, VT)) {
- if (IsSV1Undef)
- // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), N1, SV0, &Mask[0]);
- // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV1, SV0, &Mask[0]);
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ std::swap(SV0, SV1);
}
+
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
}
return SDValue();
@@ -11322,9 +12065,11 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
}
- // Let's see if the target supports this vector_shuffle.
+ // Let's see if the target supports this vector_shuffle and make sure
+ // we're not running after operation legalization where it may have
+ // custom lowered the vector shuffles.
EVT RVT = RHS.getValueType();
- if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 8facbc2..1df4a1d 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -40,12 +40,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/FastISel.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -62,7 +63,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -728,6 +728,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// For AnyRegCC the arguments are lowered later on manually.
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
CallLoweringInfo CLI;
+ CLI.setIsPatchPoint();
if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI))
return false;
@@ -1579,7 +1580,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
bool SkipTargetIndependentISel)
: FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
- TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()),
+ TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()),
TII(*MF->getSubtarget().getInstrInfo()),
TLI(*MF->getSubtarget().getTargetLowering()),
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 86b9542..7e72dc6 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -133,16 +133,17 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
ImmutableCallSite CS(I);
if (isa<InlineAsm>(CS.getCalledValue())) {
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
- TLI->ParseConstraints(CS);
+ TLI->ParseConstraints(TRI, CS);
for (size_t I = 0, E = Ops.size(); I != E; ++I) {
TargetLowering::AsmOperandInfo &Op = Ops[I];
if (Op.Type == InlineAsm::isClobber) {
// Clobbers don't have SDValue operands, hence SDValue().
TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
std::pair<unsigned, const TargetRegisterClass *> PhysReg =
- TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
- Op.ConstraintVT);
+ TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
+ Op.ConstraintVT);
if (PhysReg.first == SP)
MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
}
@@ -273,6 +274,7 @@ void FunctionLoweringInfo::clear() {
ArgDbgValues.clear();
ByValArgFrameIndexMap.clear();
RegFixups.clear();
+ StatepointStackSlots.clear();
PreferredExtendType.clear();
}
@@ -470,60 +472,6 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
}
}
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
- MachineBasicBlock *MBB) {
- // Inform the MachineModuleInfo of the personality for this landing pad.
- const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
- assert(CE->getOpcode() == Instruction::BitCast &&
- isa<Function>(CE->getOperand(0)) &&
- "Personality should be a function");
- MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
-
- // Gather all the type infos for this landing pad and pass them along to
- // MachineModuleInfo.
- std::vector<const GlobalValue *> TyInfo;
- unsigned N = I.getNumArgOperands();
-
- for (unsigned i = N - 1; i > 1; --i) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
- unsigned FilterLength = CI->getZExtValue();
- unsigned FirstCatch = i + FilterLength + !FilterLength;
- assert(FirstCatch <= N && "Invalid filter length");
-
- if (FirstCatch < N) {
- TyInfo.reserve(N - FirstCatch);
- for (unsigned j = FirstCatch; j < N; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
- MMI->addCatchTypeInfo(MBB, TyInfo);
- TyInfo.clear();
- }
-
- if (!FilterLength) {
- // Cleanup.
- MMI->addCleanup(MBB);
- } else {
- // Filter.
- TyInfo.reserve(FilterLength - 1);
- for (unsigned j = i + 1; j < FirstCatch; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
- MMI->addFilterTypeInfo(MBB, TyInfo);
- TyInfo.clear();
- }
-
- N = i;
- }
- }
-
- if (N > 2) {
- TyInfo.reserve(N - 2);
- for (unsigned j = 2; j < N; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
- MMI->addCatchTypeInfo(MBB, TyInfo);
- }
-}
-
/// AddLandingPadInfo - Extract the exception handling information from the
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a65f33e..93699a7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
Type *Type = CP->getType();
// MachineConstantPool wants an explicit alignment.
if (Align == 0) {
- Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type);
+ Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type);
if (Align == 0) {
// Alignment of vector types. FIXME!
- Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type);
+ Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type);
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5d17a5f..61c0a6f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -40,7 +40,7 @@ using namespace llvm;
#define DEBUG_TYPE "legalizedag"
//===----------------------------------------------------------------------===//
-/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// This takes an arbitrary SelectionDAG as input and
/// hacks on it until the target machine can handle it. This involves
/// eliminating value sizes the machine cannot handle (promoting small sizes to
/// large sizes or splitting up large values into small values) as well as
@@ -86,7 +86,7 @@ private:
void LegalizeLoadOps(SDNode *Node);
void LegalizeStoreOps(SDNode *Node);
- /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+ /// Some targets cannot handle a variable
/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
/// the insert there, and then read the result back.
@@ -95,7 +95,7 @@ private:
SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
SDValue Idx, SDLoc dl);
- /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+ /// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
@@ -200,7 +200,7 @@ public:
};
}
-/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
@@ -232,7 +232,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl,
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
}
-/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// Expands the ConstantFP node to an integer constant or
/// a load from the constant pool.
SDValue
SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
@@ -260,7 +260,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
// Only do this if the target has a native EXTLOAD instruction from
// smaller type.
- TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
TLI.ShouldShrinkFPConstant(OrigVT)) {
Type *SType = SVT.getTypeForEVT(*DAG.getContext());
LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
@@ -286,7 +286,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
return Result;
}
-/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+/// Expands an unaligned store to 2 half-size stores.
static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
const TargetLowering &TLI,
SelectionDAGLegalize *DAGLegalize) {
@@ -409,7 +409,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
}
-/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+/// Expands an unaligned load to 2 half-size loads.
static void
ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -561,8 +561,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
ChainResult = TF;
}
-/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
-/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+/// Some target cannot handle a variable insertion index for the
+/// INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
/// the insert there, and then read the result back.
SDValue SelectionDAGLegalize::
@@ -725,14 +725,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
if (Align < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node),
- DAG, TLI, this);
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
break;
}
case TargetLowering::Custom: {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res.getNode())
+ if (Res && Res != SDValue(Node, 0))
ReplaceNode(SDValue(Node, 0), Res);
return;
}
@@ -766,8 +765,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment,
- AAInfo);
+ NVT, isVolatile, isNonTemporal, Alignment, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -845,7 +843,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
}
case TargetLowering::Custom: {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res.getNode())
+ if (Res && Res != SDValue(Node, 0))
ReplaceNode(SDValue(Node, 0), Res);
return;
}
@@ -946,7 +944,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// nice to have an effective generic way of getting these benefits...
// Until such a way is found, don't insist on promoting i1 here.
(SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) ==
+ TargetLowering::Promote)) {
// Promote to a byte-sized load if not loading an integral number of
// bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
unsigned NewWidth = SrcVT.getStoreSizeInBits();
@@ -1058,7 +1057,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Ch;
} else {
bool isCustom = false;
- switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) {
+ switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0),
+ SrcVT.getSimpleVT())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
@@ -1080,36 +1080,35 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
- Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getDataLayout()->getABITypeAlignment(Ty);
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty);
if (Align < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Value, Chain);
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
}
}
}
break;
}
case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) &&
- TLI.isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
- LD->getMemOperand());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+ // If the source type is not legal, see if there is a legal extload to
+ // an intermediate type that we can then extend further.
+ EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
+ if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
+ // If we are loading a legal type, this is a non-extload followed by a
+ // full extend.
+ ISD::LoadExtType MidExtType =
+ (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType;
+
+ SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr,
+ SrcVT, LD->getMemOperand());
+ unsigned ExtendOp =
+ ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType);
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
}
- Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Chain = Load.getValue(1);
- break;
}
assert(!SrcVT.isVector() &&
@@ -1133,8 +1132,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Result.getValueType(),
Result, DAG.getValueType(SrcVT));
else
- ValRes = DAG.getZeroExtendInReg(Result, dl,
- SrcVT.getScalarType());
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
Value = ValRes;
Chain = Result.getValue(1);
break;
@@ -1155,8 +1153,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
-/// LegalizeOp - Return a legal replacement for the given operation, with
-/// all legal operands.
+/// Return a legal replacement for the given operation, with all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
@@ -1642,8 +1639,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
Results.push_back(Tmp2);
}
-/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
-/// condition code CC on the current target.
+/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
+/// target.
///
/// If the SETCC has been legalized using AND / OR, then the legalized node
/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
@@ -1757,7 +1754,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
return false;
}
-/// EmitStackConvert - Emit a store/load combination to the stack. This stores
+/// Emit a store/load combination to the stack. This stores
/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
/// a load from the stack slot to DestVT, extending it if needed.
/// The resultant code need not be legal.
@@ -1917,7 +1914,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
return true;
}
-/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// Expand a BUILD_VECTOR node on targets that don't
/// support the operation, but do support the resultant vector type.
SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
unsigned NumElems = Node->getNumOperands();
@@ -2029,7 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
return ExpandVectorBuildThroughStack(Node);
}
-// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// Expand a node into a call to a libcall. If the result value
// does not fit into a register, return the lo part and set the hi part to the
// by-reg argument. If it does fit into a single register, return the result
// and leave the Hi part unset.
@@ -2077,7 +2074,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return CallInfo.first;
}
-/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// Generate a libcall taking the given operands as arguments
/// and returning a result of type RetVT.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
const SDValue *Ops, unsigned NumOps,
@@ -2108,7 +2105,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
return CallInfo.first;
}
-// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// Expand a node into a call to a libcall. Similar to
// ExpandLibCall except that the first operand is the in-chain.
std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
@@ -2178,7 +2175,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
-/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+/// Return true if divmod libcall is available.
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
@@ -2194,8 +2191,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
return TLI.getLibcallName(LC) != nullptr;
}
-/// useDivRem - Only issue divrem libcall if both quotient and remainder are
-/// needed.
+/// Only issue divrem libcall if both quotient and remainder are needed.
static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
// The other use might have been replaced with a divrem already.
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
@@ -2220,8 +2216,7 @@ static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
return false;
}
-/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
-/// pairs.
+/// Issue libcalls to __{u}divmod to compute div / rem pairs.
void
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
@@ -2283,7 +2278,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Results.push_back(Rem);
}
-/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
+/// Return true if sincos libcall is available.
static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
@@ -2297,8 +2292,8 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
return TLI.getLibcallName(LC) != nullptr;
}
-/// canCombineSinCosLibcall - Return true if sincos libcall is available and
-/// can be used to combine sin and cos.
+/// Return true if sincos libcall is available and can be used to combine sin
+/// and cos.
static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
const TargetMachine &TM) {
if (!isSinCosLibcallAvailable(Node, TLI))
@@ -2311,8 +2306,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
return true;
}
-/// useSinCos - Only issue sincos libcall if both sin and cos are
-/// needed.
+/// Only issue sincos libcall if both sin and cos are needed.
static bool useSinCos(SDNode *Node) {
unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
? ISD::FCOS : ISD::FSIN;
@@ -2330,8 +2324,7 @@ static bool useSinCos(SDNode *Node) {
return false;
}
-/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos
-/// pairs.
+/// Issue libcalls to sincos to compute sin / cos pairs.
void
SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
@@ -2396,7 +2389,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
MachinePointerInfo(), false, false, false, 0));
}
-/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
/// legal for the target.
@@ -2594,7 +2587,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
}
-/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// This function is responsible for legalizing a
/// *INT_TO_FP operation of the specified operand when the target requests that
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
@@ -2636,7 +2629,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
dl, NewInTy, LegalOp));
}
-/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// This function is responsible for legalizing a
/// FP_TO_*INT operation of the specified operand when the target requests that
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
@@ -2680,8 +2673,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
}
-/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
-///
+/// Open code the operations for BSWAP of the specified operation.
SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
EVT VT = Op.getValueType();
EVT SHVT = TLI.getShiftAmountTy(VT);
@@ -2727,8 +2719,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
}
}
-/// ExpandBitCount - Expand the specified bitcount instruction into operations.
-///
+/// Expand the specified bitcount instruction into operations.
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
SDLoc dl) {
switch (Opc) {
@@ -3528,6 +3519,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
+ case ISD::FMAD:
+ llvm_unreachable("Illegal fmad should never be formed");
+
case ISD::FADD:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
RTLIB::ADD_F80, RTLIB::ADD_F128,
@@ -3554,6 +3548,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::FP_TO_FP16: {
+ if (!TM.Options.UseSoftFloat && TM.Options.UnsafeFPMath) {
+ SDValue Op = Node->getOperand(0);
+ MVT SVT = Op.getSimpleValueType();
+ if ((SVT == MVT::f64 || SVT == MVT::f80) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
+ // Under fastmath, we can expand this node into a fround followed by
+ // a float-half conversion.
+ SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
+ break;
+ }
+ }
+
RTLIB::Libcall LC =
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
@@ -4319,8 +4328,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
ReplaceNode(Node, Results.data());
}
-// SelectionDAG::Legalize - This is the entry point for the file.
-//
+/// This is the entry point for the file.
void SelectionDAG::Legalize() {
AssignTopologicalOrder();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4591e79..b596715 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, false, dl).first;
+ &Op, 1, Signed, dl).first;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b73bb0a..5507c70 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -66,6 +66,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break;
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
@@ -454,6 +455,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
+
+ SDValue Mask = N->getMask();
+ EVT NewMaskVT = getSetCCResultType(NVT);
+ if (NewMaskVT != N->getMask().getValueType())
+ Mask = PromoteTargetBoolean(Mask, NewMaskVT);
+ SDLoc dl(N);
+
+ SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
+ Mask, ExtSrc0, N->getMemoryVT(),
+ N->getMemOperand(), ISD::SEXTLOAD);
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Simply change the return type of the boolean result.
@@ -825,6 +844,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
+ case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
+ OpNo); break;
+ case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
+ OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
@@ -1091,6 +1114,64 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getMemoryVT(), N->getMemOperand());
}
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
+
+ assert(OpNo == 2 && "Only know how to promote the mask!");
+ SDValue DataOp = N->getValue();
+ EVT DataVT = DataOp.getValueType();
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDLoc dl(N);
+
+ bool TruncateStore = false;
+ if (!TLI.isTypeLegal(DataVT)) {
+ if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
+ DataOp = GetPromotedInteger(DataOp);
+ Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ TruncateStore = true;
+ }
+ else {
+ assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
+ "Unexpected data legalization in MSTORE");
+ DataOp = GetWidenedVector(DataOp);
+
+ if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ EVT BoolVT = getSetCCResultType(DataOp.getValueType());
+
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+ }
+ }
+ else
+ Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
+ return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
+ N->getMemoryVT(), N->getMemOperand(),
+ TruncateStore);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
+ assert(OpNo == 2 && "Only know how to promote the mask!");
+ EVT DataVT = N->getValueType(0);
+ SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
@@ -2936,17 +3017,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
- unsigned NumElts = VT.getVectorNumElements();
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- NewMask.push_back(SV->getMaskElt(i));
- }
+ ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());
SDValue V0 = GetPromotedInteger(N->getOperand(0));
SDValue V1 = GetPromotedInteger(N->getOperand(1));
EVT OutVT = V0.getValueType();
- return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index bd7dacf..ebf6b28 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -921,6 +921,17 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
// The target didn't want to custom lower it after all.
return false;
+ // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to
+ // provide the same kind of custom splitting behavior.
+ if (Results.size() == N->getNumValues() + 1 && LegalizeResult) {
+ // We've legalized a return type by splitting it. If there is a chain,
+ // replace that too.
+ SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]);
+ if (N->getNumValues() > 1)
+ ReplaceValueWith(SDValue(N, 1), Results[2]);
+ return true;
+ }
+
// Make everything that once used N's values now use those in Results instead.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 30f412b..cef3fc9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -240,6 +240,7 @@ private:
SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_SDIV(SDNode *N);
@@ -285,6 +286,8 @@ private:
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -578,6 +581,7 @@ private:
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -594,6 +598,7 @@ private:
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_TRUNCATE(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
@@ -627,6 +632,7 @@ private:
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
@@ -653,6 +659,7 @@ private:
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b5af7b7..03c2734 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -200,12 +200,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
- switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getMemoryVT())) {
+ switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
+ LD->getMemoryVT())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
+ if (Lowered == Result)
+ return TranslateLegalizeResults(Op, Lowered);
Changed = true;
if (Lowered->getNumValues() != Op->getNumValues()) {
// This expanded to something other than the load. Assume the
@@ -231,9 +234,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
- case TargetLowering::Custom:
- Changed = true;
- return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG));
+ case TargetLowering::Custom: {
+ SDValue Lowered = TLI.LowerOperation(Result, DAG);
+ Changed = Lowered != Result;
+ return TranslateLegalizeResults(Op, Lowered);
+ }
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandStore(Op));
@@ -389,7 +394,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
if (Op.getOperand(j)
.getValueType()
.getVectorElementType()
- .isFloatingPoint())
+ .isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
else
Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
@@ -398,8 +404,9 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
- if (VT.isFloatingPoint() ||
- (VT.isVector() && VT.getVectorElementType().isFloatingPoint()))
+ if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
+ (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));
else
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
@@ -509,7 +516,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment(),
+ LD->isInvariant(),
+ MinAlign(LD->getAlignment(), Offset),
LD->getAAInfo());
} else {
EVT LoadVT = WideVT;
@@ -521,7 +529,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
LD->getPointerInfo().getWithOffset(Offset),
LoadVT, LD->isVolatile(),
LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment(), LD->getAAInfo());
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getAAInfo());
}
RemainingBytes -= LoadBytes;
@@ -553,9 +562,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
BitOffset += SrcEltBits;
if (BitOffset >= WideBits) {
WideIdx++;
- Offset -= WideBits;
- if (Offset > 0) {
- ShAmt = DAG.getConstant(SrcEltBits - Offset,
+ BitOffset -= WideBits;
+ if (BitOffset > 0) {
+ ShAmt = DAG.getConstant(SrcEltBits - BitOffset,
TLI.getShiftAmountTy(WideVT));
Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
@@ -592,7 +601,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment(), LD->getAAInfo());
+ MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
@@ -651,7 +660,8 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment, AAInfo);
+ isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride),
+ AAInfo);
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 27f63d2..63671f7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -597,6 +597,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
+ case ISD::MLOAD:
+ SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
+ break;
case ISD::SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
@@ -979,6 +982,67 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
ReplaceValueWith(SDValue(LD, 1), Ch);
}
+void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
+ SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MLD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Ch = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ SDValue Mask = MLD->getMask();
+ unsigned Alignment = MLD->getOriginalAlignment();
+ ISD::LoadExtType ExtType = MLD->getExtensionType();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+ EVT MemoryVT = MLD->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue Src0 = MLD->getSrc0();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ ExtType);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ ExtType);
+
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MLD, 1), Ch);
+
+}
+
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
@@ -1234,6 +1298,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
+ case ISD::MSTORE:
+ Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+ break;
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
@@ -1395,6 +1462,58 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
MachinePointerInfo(), EltVT, false, false, false, 0);
}
+SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Mask = N->getMask();
+ SDValue Data = N->getValue();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ SDLoc DL(N);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ GetSplitVector(Data, DataLo, DataHi);
+ SDValue MaskLo, MaskHi;
+ GetSplitVector(Mask, MaskLo, MaskHi);
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ SDValue Lo, Hi;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ N->isTruncatingStore());
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, N->getAAInfo(), N->getRanges());
+
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ N->isTruncatingStore());
+
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
assert(N->isUnindexed() && "Indexed store of vector?");
assert(OpNo == 1 && "Can only split the stored value");
@@ -1599,6 +1718,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SHUFFLE:
Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
break;
+ case ISD::MLOAD:
+ Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
case ISD::ADD:
case ISD::AND:
@@ -2289,6 +2411,44 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return Result;
}
+SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue Src0 = GetWidenedVector(N->getSrc0());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ SDLoc dl(N);
+
+ if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ EVT BoolVT = getSetCCResultType(WidenVT);
+
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+
+ SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
+ Mask, Src0, N->getMemoryVT(),
+ N->getMemOperand(), ExtType);
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
@@ -2434,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::ANY_EXTEND:
@@ -2632,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}
+SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue StVal = MST->getValue();
+ // Widen the value
+ SDValue WideVal = GetWidenedVector(StVal);
+ SDLoc dl(N);
+
+ if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ // The mask should be widened as well
+ EVT BoolVT = getSetCCResultType(WideVal.getValueType());
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+ assert(Mask.getValueType().getVectorNumElements() ==
+ WideVal.getValueType().getVectorNumElements() &&
+ "Mask and data vectors should have the same number of elements");
+ return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
+ Mask, MST->getMemoryVT(), MST->getMemOperand(),
+ false);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 8b9f618..3853ada 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -137,13 +137,9 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
}
// Helper for AddGlue to clone node operands.
-static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
- SmallVectorImpl<EVT> &VTs,
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
SDValue ExtraOper = SDValue()) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
- Ops.push_back(N->getOperand(I));
-
+ SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
if (ExtraOper.getNode())
Ops.push_back(ExtraOper);
@@ -165,7 +161,6 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
}
static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
- SmallVector<EVT, 4> VTs;
SDNode *GlueDestNode = Glue.getNode();
// Don't add glue from a node to itself.
@@ -179,9 +174,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
// Don't add glue to something that already has a glue value.
if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
- for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
- VTs.push_back(N->getValueType(I));
-
+ SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
if (AddGlue)
VTs.push_back(MVT::Glue);
@@ -197,11 +190,8 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
!N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
"expected an unused glue value");
- SmallVector<EVT, 4> VTs;
- for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
- VTs.push_back(N->getValueType(I));
-
- CloneNodeWithValues(N, DAG, VTs);
+ CloneNodeWithValues(N, DAG,
+ makeArrayRef(N->value_begin(), N->getNumValues() - 1));
}
/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
@@ -551,6 +541,14 @@ void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
NodeNumDefs = 0;
return;
}
+ if (POpc == TargetOpcode::PATCHPOINT &&
+ Node->getValueType(0) == MVT::Other) {
+ // PATCHPOINT is defined to have one result, but it might really have none
+ // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
+ // real definition.
+ NodeNumDefs = 0;
+ return;
+ }
unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
// Some instructions define regs that are not represented in the selection DAG
// (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7961e66..9466f4d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -234,10 +234,10 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return true;
}
-ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
+ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
- return ISD::ANY_EXTEND;
+ return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
case ISD::SEXTLOAD:
return ISD::SIGN_EXTEND;
case ISD::ZEXTLOAD:
@@ -1484,6 +1484,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
if (N1.getOpcode() == ISD::UNDEF)
commuteShuffle(N1, N2, MaskVec);
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
+
+ for (int i = 0; i < (int)NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts))
+ continue;
+
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
+
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
bool AllLHS = true, AllRHS = true;
@@ -1513,9 +1541,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
return getUNDEF(VT);
// If Identity shuffle return that node.
- bool Identity = true;
+ bool Identity = true, AllSame = true;
for (unsigned i = 0; i != NElts; ++i) {
if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] != MaskVec[0]) AllSame = false;
}
if (Identity && NElts)
return N1;
@@ -1537,18 +1566,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
if (Splat && Splat.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
+ bool SameNumElts =
+ V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
// We only have a splat which can skip shuffles if there is a splatted
// value and no undef lanes rearranged by the shuffle.
if (Splat && UndefElements.none()) {
// Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
// number of elements match or the value splatted is a zero constant.
- if (V.getValueType().getVectorNumElements() ==
- VT.getVectorNumElements())
+ if (SameNumElts)
return N1;
if (auto *C = dyn_cast<ConstantSDNode>(Splat))
if (C->isNullValue())
return N1;
}
+
+ // If the shuffle itself creates a splat, build the vector directly.
+ if (AllSame && SameNumElts) {
+ const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+ SmallVector<SDValue, 8> Ops(NElts, Splatted);
+
+ EVT BuildVT = BV->getValueType(0);
+ SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (BuildVT != VT)
+ NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+ return NewBV;
+ }
}
}
@@ -2323,6 +2369,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
break;
}
+ case ISD::EXTRACT_ELEMENT: {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ const unsigned Index =
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ const unsigned BitWidth = Op.getValueType().getSizeInBits();
+
+ // Remove low part of known bits mask
+ KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
+ KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+
+ // Remove high part of known bit mask
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ break;
+ }
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
@@ -2522,6 +2583,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// FIXME: it's tricky to do anything useful for this, but it is an important
// case for targets like X86.
break;
+ case ISD::EXTRACT_ELEMENT: {
+ const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ const int BitWidth = Op.getValueType().getSizeInBits();
+ const int Items =
+ Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+
+ // Get reverse index (starting from 1), Op1 value indexes elements from
+ // little end. Sign starts at big end.
+ const int rIndex = Items - 1 -
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ // If the sign portion ends in our element the substraction gives correct
+ // result. Otherwise it gives either negative or > bitwidth result
+ return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+ }
}
// If we are looking at the loaded value of the SDNode.
@@ -2683,6 +2759,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstantFP(apf, VT);
}
case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT);
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2756,7 +2834,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstant(api, VT);
}
case ISD::BITCAST:
- if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
@@ -3379,8 +3459,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Perform trivial constant folding.
- SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
- if (SV.getNode()) return SV;
+ if (SDValue SV =
+ FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()))
+ return SV;
// Canonicalize constant to RHS if commutative.
if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
@@ -3564,7 +3645,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
const APFloat &V3 = N3CFP->getValueAPF();
APFloat::opStatus s =
V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
- if (s != APFloat::opInvalidOp)
+ if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
return getConstantFP(V1, VT);
}
break;
@@ -3913,9 +3994,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize =
- MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4028,8 +4107,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4123,8 +4201,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4214,11 +4291,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memcpy with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result =
- TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
- isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemcpy(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// If we really need inline code and the target declined to provide it,
// use a (potentially long) sequence of loads and stores.
@@ -4280,10 +4359,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result = TSI->EmitTargetCodeForMemmove(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemmove(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
@@ -4332,10 +4413,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memset with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src,
- Size, Align, isVol, DstPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemset(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// Emit a library call.
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
@@ -4680,10 +4763,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
assert(VT.isInteger() == MemVT.isInteger() &&
"Cannot convert from FP to Int or Int -> FP!");
assert(VT.isVector() == MemVT.isVector() &&
- "Cannot use trunc store to convert to or from a vector!");
+ "Cannot use an ext load to convert to or from a vector!");
assert((!VT.isVector() ||
VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
- "Cannot use trunc store to change the number of vector elements!");
+ "Cannot use an ext load to change the number of vector elements!");
}
bool Indexed = AM != ISD::UNINDEXED;
@@ -4917,6 +5000,61 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
return SDValue(N, 0);
}
+SDValue
+SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
+ MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
+ MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Ops, 4, VTs,
+ ExtTy, MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO, bool isTrunc) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Ops, 4,
+ VTs, isTrunc, MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
SDValue Chain, SDValue Ptr,
SDValue SV,
@@ -6495,11 +6633,25 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- // Handle X+C
- if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
- cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
- return true;
-
+ // Handle X + C.
+ if (isBaseWithConstantOffset(Loc)) {
+ int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc) {
+ // If the base location is a simple address with no offset itself, then
+ // the second load's first add operand should be the base address.
+ if (LocOffset == Dist * (int)Bytes)
+ return true;
+ } else if (isBaseWithConstantOffset(BaseLoc)) {
+ // The base location itself has an offset, so subtract that value from the
+ // second load's offset before comparing to distance * size.
+ int64_t BOffset =
+ cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
+ if ((LocOffset - BOffset) == Dist * (int)Bytes)
+ return true;
+ }
+ }
+ }
const GlobalValue *GV1 = nullptr;
const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8f582f1..097b618 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -16,9 +16,11 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
@@ -46,6 +48,8 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -54,7 +58,6 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -564,6 +567,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
+ assert(NumIntermediates != 0 && "division by zero");
assert(NumParts % NumIntermediates == 0 &&
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
@@ -865,7 +869,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
AA = &aa;
GFI = gfi;
LibInfo = li;
- DL = DAG.getSubtarget().getDataLayout();
+ DL = DAG.getTarget().getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
}
@@ -884,6 +888,7 @@ void SelectionDAGBuilder::clear() {
CurInst = nullptr;
HasTailCall = false;
SDNodeOrder = LowestSDNodeOrder;
+ StatepointLowering.clear();
}
/// clearDanglingDebugInfo - Clear the dangling debug information
@@ -1234,24 +1239,29 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
unsigned NumValues = ValueVTs.size();
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
- for (unsigned j = 0, f = NumValues; j != f; ++j) {
- EVT VT = ValueVTs[j];
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ const Function *F = I.getParent()->getParent();
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ LLVMContext &Context = F->getContext();
+ bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::InReg);
- const Function *F = I.getParent()->getParent();
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::SExt))
- ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::ZExt))
- ExtendKind = ISD::ZERO_EXTEND;
+ for (unsigned j = 0; j != NumValues; ++j) {
+ EVT VT = ValueVTs[j];
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
- VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
+ VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind);
- unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
- MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumParts = TLI.getNumRegisters(Context, VT);
+ MVT PartVT = TLI.getRegisterType(Context, VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
@@ -1259,8 +1269,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::InReg))
+ if (RetInReg)
Flags.setInReg();
// Propagate extension type if any
@@ -1405,7 +1414,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
} else {
- Condition = ISD::SETEQ; // silence warning.
+ (void)Condition; // silence warning.
llvm_unreachable("Unknown compare instruction");
}
@@ -1947,7 +1956,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
Reg, VT);
SDValue Cmp;
- unsigned PopCount = CountPopulation_64(B.Mask);
+ unsigned PopCount = countPopulation(B.Mask);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
@@ -1959,7 +1968,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
// There is only one zero bit in the range, test for it directly.
Cmp = DAG.getSetCC(
getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
- DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE);
+ DAG.getConstant(countTrailingOnes(B.Mask), VT), ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT,
@@ -2062,10 +2071,14 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// Get the two live-in registers as SDValues. The physregs have already been
// copied into virtual registers.
SDValue Ops[2];
- Ops[0] = DAG.getZExtOrTrunc(
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
- getCurSDLoc(), ValueVTs[0]);
+ if (FuncInfo.ExceptionPointerVirtReg) {
+ Ops[0] = DAG.getZExtOrTrunc(
+ DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+ getCurSDLoc(), ValueVTs[0]);
+ } else {
+ Ops[0] = DAG.getConstant(0, TLI.getPointerTy());
+ }
Ops[1] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
@@ -2077,6 +2090,27 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
setValue(&LP, Res);
}
+unsigned
+SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV,
+ MachineBasicBlock *LPadBB) {
+ SDValue Chain = getControlRoot();
+
+ // Get the typeid that we will dispatch on later.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV);
+ SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy());
+ Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel);
+
+ // Branch to the main landing pad block.
+ MachineBasicBlock *ClauseMBB = FuncInfo.MBB;
+ ClauseMBB->addSuccessor(LPadBB);
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain,
+ DAG.getBasicBlock(LPadBB)));
+ return VReg;
+}
+
/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
/// small case ranges).
bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
@@ -2363,17 +2397,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
MachineBasicBlock* SwitchBB) {
- // Get the MachineFunction which holds the current MBB. This is used when
- // inserting any additional MBBs necessary to represent the switch.
- MachineFunction *CurMF = FuncInfo.MF;
-
- // Figure out which block is immediately after the current one.
- MachineFunction::iterator BBI = CR.CaseBB;
- ++BBI;
-
Case& FrontCase = *CR.Range.first;
Case& BackCase = *(CR.Range.second-1);
- const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
// Size is the number of Cases represented by this range.
unsigned Size = CR.Range.second - CR.Range.first;
@@ -2395,6 +2420,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
DEBUG(dbgs() << "Selecting best pivot: \n"
<< "First: " << First << ", Last: " << Last <<'\n'
<< "LSize: " << LSize << ", RSize: " << RSize << '\n');
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
J!=E; ++I, ++J) {
const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
@@ -2404,13 +2430,17 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
"Invalid case distance");
// Use volatile double here to avoid excess precision issues on some hosts,
// e.g. that use 80-bit X87 registers.
+ // Only consider the density of sub-ranges that actually have sufficient
+ // entries to be lowered as a jump table.
volatile double LDensity =
- (double)LSize.roundToDouble() /
- (LEnd - First + 1ULL).roundToDouble();
+ LSize.ult(TLI.getMinimumJumpTableEntries())
+ ? 0.0
+ : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble();
volatile double RDensity =
- (double)RSize.roundToDouble() /
- (Last - RBegin + 1ULL).roundToDouble();
- volatile double Metric = Range.logBase2()*(LDensity+RDensity);
+ RSize.ult(TLI.getMinimumJumpTableEntries())
+ ? 0.0
+ : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble();
+ volatile double Metric = Range.logBase2() * (LDensity + RDensity);
// Should always split in some non-trivial place
DEBUG(dbgs() <<"=>Step\n"
<< "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
@@ -2427,13 +2457,25 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
RSize -= J->size();
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (areJTsAllowed(TLI)) {
- // If our case is dense we *really* should handle it earlier!
- assert((FMetric > 0) && "Should handle dense range earlier!");
- } else {
+ if (FMetric == 0 || !areJTsAllowed(TLI))
Pivot = CR.Range.first + Size/2;
- }
+ splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB);
+ return true;
+}
+
+void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
+ CaseRecVector &WorkList,
+ const Value *SV,
+ MachineBasicBlock *SwitchBB) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
@@ -2446,10 +2488,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// LHS's Case Value, and that Case Value is exactly one less than the
// Pivot's Value, then we can branch directly to the LHS's Target,
// rather than creating a leaf node for it.
- if ((LHSR.second - LHSR.first) == 1 &&
- LHSR.first->High == CR.GE &&
+ if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE &&
cast<ConstantInt>(C)->getValue() ==
- (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
TrueBB = LHSR.first->BB;
} else {
TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
@@ -2466,12 +2507,12 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// the current Case Value, rather than emitting a RHS leaf node for it.
if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
cast<ConstantInt>(RHSR.first->Low)->getValue() ==
- (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
FalseBB = RHSR.first->BB;
} else {
FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
CurMF->insert(BBI, FalseBB);
- WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+ WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR));
// Put SV in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(SV);
@@ -2486,8 +2527,6 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
visitSwitchCase(CB, SwitchBB);
else
SwitchCases.push_back(CB);
-
- return true;
}
/// handleBitTestsSwitchCase - if current case range has few destination and
@@ -2514,15 +2553,14 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
return false;
size_t numCmps = 0;
- for (CaseItr I = CR.Range.first, E = CR.Range.second;
- I!=E; ++I) {
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
// Single case counts one, case range - two.
numCmps += (I->Low == I->High ? 1 : 2);
}
// Count unique destinations
SmallSet<MachineBasicBlock*, 4> Dests;
- for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
Dests.insert(I->BB);
if (Dests.size() > 3)
// Don't bother the code below, if there are too much unique destinations
@@ -2629,9 +2667,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const SwitchInst& SI) {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
- // Start with "simple" cases
- for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
- i != e; ++i) {
+ // Start with "simple" cases.
+ for (SwitchInst::ConstCaseIt i : SI.cases()) {
const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
@@ -2694,32 +2731,58 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Figure out which block is immediately after the current one.
MachineBasicBlock *NextBlock = nullptr;
+ if (SwitchMBB + 1 != FuncInfo.MF->end())
+ NextBlock = SwitchMBB + 1;
+
+
+ // Create a vector of Cases, sorted so that we can efficiently create a binary
+ // search tree from them.
+ CaseVector Cases;
+ Clusterify(Cases, SI);
+
+ // Get the default destination MBB.
MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
- // If there is only the default destination, branch to it if it is not the
- // next basic block. Otherwise, just fall through.
- if (!SI.getNumCases()) {
- // Update machine-CFG edges.
+ if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) &&
+ !Cases.empty()) {
+ // Replace an unreachable default destination with the most popular case
+ // destination.
+ DenseMap<const BasicBlock *, unsigned> Popularity;
+ unsigned MaxPop = 0;
+ const BasicBlock *MaxBB = nullptr;
+ for (auto I : SI.cases()) {
+ const BasicBlock *BB = I.getCaseSuccessor();
+ if (++Popularity[BB] > MaxPop) {
+ MaxPop = Popularity[BB];
+ MaxBB = BB;
+ }
+ }
- // If this is not a fall-through branch, emit the branch.
+ // Set new default.
+ assert(MaxPop > 0);
+ assert(MaxBB);
+ Default = FuncInfo.MBBMap[MaxBB];
+
+ // Remove cases that were pointing to the destination that is now the default.
+ Cases.erase(std::remove_if(Cases.begin(), Cases.end(),
+ [&](const Case &C) { return C.BB == Default; }),
+ Cases.end());
+ }
+
+ // If there is only the default destination, go there directly.
+ if (Cases.empty()) {
+ // Update machine-CFG edges.
SwitchMBB->addSuccessor(Default);
- if (Default != NextBlock)
- DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
- MVT::Other, getControlRoot(),
- DAG.getBasicBlock(Default)));
+ // If this is not a fall-through branch, emit the branch.
+ if (Default != NextBlock) {
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(Default)));
+ }
return;
}
- // If there are any non-default case statements, create a vector of Cases
- // representing each one, and sort the vector so that we can efficiently
- // create a binary search tree from them.
- CaseVector Cases;
- Clusterify(Cases, SI);
-
- // Get the Value to be switched on and default basic blocks, which will be
- // inserted into CaseBlock records, representing basic blocks in the binary
- // search tree.
+ // Get the Value to be switched on.
const Value *SV = SI.getCondition();
// Push the initial CaseRec onto the worklist
@@ -3613,6 +3676,74 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
DAG.setRoot(StoreNode);
}
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // llvm.masked.store.*(Src0, Ptr, alignemt, Mask)
+ Value *PtrOperand = I.getArgOperand(1);
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(I.getArgOperand(0));
+ SDValue Mask = getValue(I.getArgOperand(3));
+ EVT VT = Src0.getValueType();
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOStore, VT.getStoreSize(),
+ Alignment, AAInfo);
+ SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
+ MMO, false);
+ DAG.setRoot(StoreNode);
+ setValue(&I, StoreNode);
+}
+
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
+ Value *PtrOperand = I.getArgOperand(0);
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(I.getArgOperand(3));
+ SDValue Mask = getValue(I.getArgOperand(2));
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getType());
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SDValue InChain = DAG.getRoot();
+ if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(PtrOperand,
+ AA->getTypeStoreSize(I.getType()),
+ AAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ InChain = DAG.getEntryNode();
+ }
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOLoad, VT.getStoreSize(),
+ Alignment, AAInfo, Ranges);
+
+ SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
+ ISD::NON_EXTLOAD);
+ SDValue OutChain = Load.getValue(1);
+ DAG.setRoot(OutChain);
+ setValue(&I, Load);
+}
+
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrder = I.getSuccessOrdering();
@@ -4460,11 +4591,10 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
+ if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
// If optimizing for size, don't insert too many multiplies. This
// inserts up to 5 multiplies.
- CountPopulation_32(Val)+Log2_32(Val) < 7) {
+ countPopulation(Val) + Log2_32(Val) < 7) {
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
// powi(x,15) generates one more multiply than it should), but this has
@@ -4623,7 +4753,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
- SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg));
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
EVT VT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName));
return nullptr;
@@ -4632,7 +4763,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Value *Reg = I.getArgOperand(0);
Value *RegValue = I.getArgOperand(1);
SDValue Chain = getValue(RegValue).getOperand(0);
- SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg));
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
RegName, getValue(RegValue)));
return nullptr;
@@ -4642,6 +4774,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::longjmp:
return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
+ // FIXME: this definition of "user defined address space" is x86-specific
// Assert for address < 256 since we support only user defined address
// spaces.
assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4662,6 +4795,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset: {
+ // FIXME: this definition of "user defined address space" is x86-specific
// Assert for address < 256 since we support only user defined address
// spaces.
assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4679,6 +4813,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove: {
+ // FIXME: this definition of "user defined address space" is x86-specific
// Assert for address < 256 since we support only user defined address
// spaces.
assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4914,6 +5049,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
+ case Intrinsic::masked_load:
+ visitMaskedLoad(I);
+ return nullptr;
+ case Intrinsic::masked_store:
+ visitMaskedStore(I);
+ return nullptr;
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
@@ -5459,6 +5600,78 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
visitPatchpoint(&I);
return nullptr;
}
+ case Intrinsic::experimental_gc_statepoint: {
+ visitStatepoint(I);
+ return nullptr;
+ }
+ case Intrinsic::experimental_gc_result_int:
+ case Intrinsic::experimental_gc_result_float:
+ case Intrinsic::experimental_gc_result_ptr:
+ case Intrinsic::experimental_gc_result: {
+ visitGCResult(I);
+ return nullptr;
+ }
+ case Intrinsic::experimental_gc_relocate: {
+ visitGCRelocate(I);
+ return nullptr;
+ }
+ case Intrinsic::instrprof_increment:
+ llvm_unreachable("instrprof failed to lower an increment");
+
+ case Intrinsic::frameallocate: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ // Do the allocation and map it as a normal value.
+ // FIXME: Maybe we should add this to the alloca map so that we don't have
+ // to register allocate it?
+ uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
+ int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size);
+ MVT PtrVT = TLI.getPointerTy(0);
+ SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT);
+ setValue(&I, FIVal);
+
+ // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is
+ // the same on all targets.
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+ TII->get(TargetOpcode::FRAME_ALLOC))
+ .addSym(FrameAllocSym)
+ .addFrameIndex(Alloc);
+
+ return nullptr;
+ }
+
+ case Intrinsic::framerecover: {
+ // i8* @llvm.framerecover(i8* %fn, i8* %fp)
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT PtrVT = TLI.getPointerTy(0);
+
+ // Get the symbol that defines the frame offset.
+ Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName());
+
+ // Create a TargetExternalSymbol for the label to avoid any target lowering
+ // that would make this PC relative.
+ StringRef Name = FrameAllocSym->getName();
+ assert(Name.size() == strlen(Name.data()) && "not null terminated");
+ SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
+ SDValue OffsetVal =
+ DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
+
+ // Add the offset to the FP.
+ Value *FP = I.getArgOperand(1);
+ SDValue FPVal = getValue(FP);
+ SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
+ setValue(&I, Add);
+
+ return nullptr;
+ }
+ case Intrinsic::eh_begincatch:
+ case Intrinsic::eh_endcatch:
+ llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
}
}
@@ -5491,9 +5704,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
CLI.setChain(getRoot());
}
-
- const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
- std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
assert((CLI.IsTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
@@ -6191,9 +6403,10 @@ static void GetRegistersForValue(SelectionDAG &DAG,
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
- std::pair<unsigned, const TargetRegisterClass*> PhysReg =
- TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+ TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(),
+ OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other) {
@@ -6289,8 +6502,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfoVector ConstraintOperands;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI.ParseConstraints(CS);
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS);
bool hasMemory = false;
@@ -6382,12 +6595,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
- Input.ConstraintVT);
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+ Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
@@ -6848,7 +7062,8 @@ std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
bool UseVoidTy,
- MachineBasicBlock *LandingPad) {
+ MachineBasicBlock *LandingPad,
+ bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
@@ -6871,7 +7086,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
.setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
- .setDiscardResult(CS->use_empty());
+ .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
return lowerInvokable(CLI, LandingPad);
}
@@ -7003,7 +7218,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
std::pair<SDValue, SDValue> Result =
lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC,
- LandingPad);
+ LandingPad, true);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
@@ -7051,8 +7266,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// Push the arguments from the call instruction up to the register mask.
SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
- for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
- Ops.push_back(*i);
+ Ops.append(Call->op_begin() + 2, e);
// Push live variables for the stack map.
addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this);
@@ -7251,11 +7465,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
if (Args[i].isNest)
Flags.setNest();
- if (NeedsRegBlock) {
+ if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- if (Value == NumValues - 1)
- Flags.setInConsecutiveRegsLast();
- }
Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
@@ -7304,6 +7515,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
+
+ if (NeedsRegBlock && Value == NumValues - 1)
+ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
}
}
@@ -7460,7 +7674,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
- ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
+ ISD::InputArg::NoArgIndex, 0);
Ins.push_back(RetArg);
}
@@ -7518,11 +7733,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
- if (NeedsRegBlock) {
+ if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- if (Value == NumValues - 1)
- Flags.setInConsecutiveRegsLast();
- }
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -7537,6 +7749,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setOrigAlign(1);
Ins.push_back(MyFlags);
}
+ if (NeedsRegBlock && Value == NumValues - 1)
+ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
PartBase += VT.getStoreSize();
}
}
@@ -7671,7 +7885,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
assert(i == InVals.size() && "Argument register count mismatch!");
// Finally, if the target has anything special to do, allow it to do so.
- // FIXME: this should insert code into the DAG!
EmitFunctionEntryCode();
}
@@ -7762,6 +7975,7 @@ MachineBasicBlock *
SelectionDAGBuilder::StackProtectorDescriptor::
AddSuccessorMBB(const BasicBlock *BB,
MachineBasicBlock *ParentMBB,
+ bool IsLikely,
MachineBasicBlock *SuccMBB) {
// If SuccBB has not been created yet, create it.
if (!SuccMBB) {
@@ -7771,6 +7985,7 @@ AddSuccessorMBB(const BasicBlock *BB,
MF->insert(++BBI, SuccMBB);
}
// Add it as a successor of ParentMBB.
- ParentMBB->addSuccessor(SuccMBB);
+ ParentMBB->addSuccessor(
+ SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
return SuccMBB;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f74e652..ad7411f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -14,11 +14,13 @@
#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+#include "StatepointLowering.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLowering.h"
@@ -115,6 +117,10 @@ public:
/// get simple disambiguation between loads without worrying about alias
/// analysis.
SmallVector<SDValue, 8> PendingLoads;
+
+ /// State used while lowering a statepoint sequence (gc_statepoint,
+ /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details.
+ StatepointLoweringState StatepointLowering;
private:
/// PendingExports - CopyToReg nodes that copy values to virtual registers
@@ -417,8 +423,8 @@ private:
assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
"already initialized!");
ParentMBB = MBB;
- SuccessMBB = AddSuccessorMBB(BB, MBB);
- FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB);
+ SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
+ FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
if (!Guard)
Guard = StackProtCheckCall.getArgOperand(0);
}
@@ -487,9 +493,10 @@ private:
/// Add a successor machine basic block to ParentMBB. If the successor mbb
/// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
- /// block will be created.
+ /// block will be created. Assign a large weight if IsLikely is true.
MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
MachineBasicBlock *ParentMBB,
+ bool IsLikely,
MachineBasicBlock *SuccMBB = nullptr);
};
@@ -612,6 +619,13 @@ public:
N = NewN;
}
+ void removeValue(const Value *V) {
+ // This is to support hack in lowerCallFromStatepoint
+ // Should be removed when hack is resolved
+ if (NodeMap.count(V))
+ NodeMap.erase(V);
+ }
+
void setUnusedArgValue(const Value *V, SDValue NewN) {
SDValue &N = UnusedArgNodeMap[V];
assert(!N.getNode() && "Already set a value for this node!");
@@ -640,12 +654,15 @@ public:
unsigned NumArgs,
SDValue Callee,
bool UseVoidTy = false,
- MachineBasicBlock *LandingPad = nullptr);
+ MachineBasicBlock *LandingPad = nullptr,
+ bool IsPatchPoint = false);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
/// references that need to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+ // This function is responsible for the whole statepoint lowering process.
+ void LowerStatepoint(ImmutableStatepoint Statepoint);
private:
std::pair<SDValue, SDValue> lowerInvokable(
TargetLowering::CallLoweringInfo &CLI,
@@ -673,6 +690,8 @@ private:
CaseRecVector& WorkList,
const Value* SV,
MachineBasicBlock *SwitchBB);
+ void splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRecVector &WorkList,
+ const Value *SV, MachineBasicBlock *SwitchBB);
bool handleBitTestsSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
@@ -699,6 +718,8 @@ public:
void visitJumpTable(JumpTable &JT);
void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB);
+ unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV,
+ MachineBasicBlock *LPadMBB);
private:
// These all get lowered before this pass.
@@ -756,6 +777,8 @@ private:
void visitAlloca(const AllocaInst &I);
void visitLoad(const LoadInst &I);
void visitStore(const StoreInst &I);
+ void visitMaskedLoad(const CallInst &I);
+ void visitMaskedStore(const CallInst &I);
void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
void visitAtomicRMW(const AtomicRMWInst &I);
void visitFence(const FenceInst &I);
@@ -784,6 +807,11 @@ private:
void visitPatchpoint(ImmutableCallSite CS,
MachineBasicBlock *LandingPad = nullptr);
+ // These three are implemented in StatepointLowering.cpp
+ void visitStatepoint(const CallInst &I);
+ void visitGCRelocate(const CallInst &I);
+ void visitGCResult(const CallInst &I);
+
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index c9f6cff..17eff94 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -187,6 +187,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMUL: return "fmul";
case ISD::FDIV: return "fdiv";
case ISD::FMA: return "fma";
+ case ISD::FMAD: return "fmad";
case ISD::FREM: return "frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
@@ -269,6 +270,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Other operators
case ISD::LOAD: return "load";
case ISD::STORE: return "store";
+ case ISD::MLOAD: return "masked_load";
+ case ISD::MSTORE: return "masked_store";
case ISD::VAARG: return "vaarg";
case ISD::VACOPY: return "vacopy";
case ISD::VAEND: return "vaend";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 79109b7..5e867cf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -19,10 +19,11 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -31,6 +32,7 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
@@ -40,6 +42,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -47,7 +50,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -181,6 +183,10 @@ UseMBPI("use-mbpi",
cl::init(true), cl::Hidden);
#ifndef NDEBUG
+static cl::opt<std::string>
+FilterDAGBasicBlockName("filter-view-dags", cl::Hidden,
+ cl::desc("Only display the basic block whose name "
+ "matches this for all view-*-dags options"));
static cl::opt<bool>
ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
cl::desc("Pop up a window to show dags before the first "
@@ -345,7 +351,8 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
- initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
}
SelectionDAGISel::~SelectionDAGISel() {
@@ -359,7 +366,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -372,7 +379,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
///
/// This is required for correctness, so it must be done at -O0.
///
-static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
// Loop for blocks with phi nodes.
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
PHINode *PN = dyn_cast<PHINode>(BB->begin());
@@ -396,8 +403,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
continue;
// Okay, we have to split this edge.
- SplitCriticalEdge(Pred->getTerminator(),
- GetSuccessorNumber(Pred, BB), SDISel, true);
+ SplitCriticalEdge(
+ Pred->getTerminator(), GetSuccessorNumber(Pred, BB),
+ CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges());
goto ReprocessBlock;
}
}
@@ -429,12 +437,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
- LibInfo = &getAnalysis<TargetLibraryInfo>();
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA);
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF, CurDAG);
@@ -650,6 +658,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
std::string BlockName;
int BlockNumber = -1;
(void)BlockNumber;
+ bool MatchFilterBB = false; (void)MatchFilterBB;
+#ifndef NDEBUG
+ MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
+ FilterDAGBasicBlockName ==
+ FuncInfo->MBB->getBasicBlock()->getName().str());
+#endif
#ifdef NDEBUG
if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
@@ -663,7 +677,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
- if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+ if (ViewDAGCombine1 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine1 input for " + BlockName);
// Run the DAG combiner in pre-legalize mode.
{
@@ -676,8 +691,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
- if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
- BlockName);
+ if (ViewLegalizeTypesDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize-types input for " + BlockName);
bool Changed;
{
@@ -691,7 +706,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->NewNodesMustHaveLegalTypes = true;
if (Changed) {
- if (ViewDAGCombineLT)
+ if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
// Run the DAG combiner in post-type-legalize mode.
@@ -717,7 +732,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->LegalizeTypes();
}
- if (ViewDAGCombineLT)
+ if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
// Run the DAG combiner in post-type-legalize mode.
@@ -731,7 +746,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
}
- if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+ if (ViewLegalizeDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize input for " + BlockName);
{
NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
@@ -741,7 +757,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
- if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+ if (ViewDAGCombine2 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine2 input for " + BlockName);
// Run the DAG combiner in post-legalize mode.
{
@@ -755,7 +772,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
- if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+ if (ViewISelDAGs && MatchFilterBB)
+ CurDAG->viewGraph("isel input for " + BlockName);
// Third, instruction select all of the operations to machine code, adding the
// code to the MachineBasicBlock.
@@ -767,7 +785,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
- if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+ if (ViewSchedDAGs && MatchFilterBB)
+ CurDAG->viewGraph("scheduler input for " + BlockName);
// Schedule machine code.
ScheduleDAGSDNodes *Scheduler = CreateScheduler();
@@ -777,7 +796,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Scheduler->Run(CurDAG, FuncInfo->MBB);
}
- if (ViewSUnitDAGs) Scheduler->viewGraph();
+ if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph();
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
@@ -892,6 +911,8 @@ void SelectionDAGISel::DoInstructionSelection() {
void SelectionDAGISel::PrepareEHLandingPad() {
MachineBasicBlock *MBB = FuncInfo->MBB;
+ const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
+
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -903,8 +924,73 @@ void SelectionDAGISel::PrepareEHLandingPad() {
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
+ // If this is an MSVC-style personality function, we need to split the landing
+ // pad into several BBs.
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
+ const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
+ MF->getMMI().addPersonality(
+ MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts()));
+ if (MF->getMMI().getPersonalityType() == EHPersonality::MSVC_Win64SEH) {
+ // Make virtual registers and a series of labels that fill in values for the
+ // clauses.
+ auto &RI = MF->getRegInfo();
+ FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC);
+
+ // Get all invoke BBs that will unwind into the clause BBs.
+ SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
+ MBB->pred_end());
+
+ // Emit separate machine basic blocks with separate labels for each clause
+ // before the main landing pad block.
+ MachineInstrBuilder SelectorPHI = BuildMI(
+ *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI),
+ FuncInfo->ExceptionSelectorVirtReg);
+ for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) {
+ // Skip filter clauses, we can't implement them yet.
+ if (LPadInst->isFilter(I))
+ continue;
+
+ MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB);
+ MF->insert(MBB, ClauseBB);
+
+ // Add the edge from the invoke to the clause.
+ for (MachineBasicBlock *InvokeBB : InvokeBBs)
+ InvokeBB->addSuccessor(ClauseBB);
+
+ // Mark the clause as a landing pad or MI passes will delete it.
+ ClauseBB->setIsLandingPad();
+
+ GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I));
+
+ // Start the BB with a label.
+ MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB);
+ BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II)
+ .addSym(ClauseLabel);
+
+ // Construct a simple BB that defines a register with the typeid constant.
+ FuncInfo->MBB = ClauseBB;
+ FuncInfo->InsertPt = ClauseBB->end();
+ unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Add the typeid virtual register to the phi in the main landing pad.
+ SelectorPHI.addReg(VReg).addMBB(ClauseBB);
+ }
+
+ // Remove the edge from the invoke to the lpad.
+ for (MachineBasicBlock *InvokeBB : InvokeBBs)
+ InvokeBB->removeSuccessor(MBB);
+
+ // Restore FuncInfo back to its previous state and select the main landing
+ // pad block.
+ FuncInfo->MBB = MBB;
+ FuncInfo->InsertPt = MBB->end();
+ return;
+ }
+
// Mark exception register as live in.
- const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
if (unsigned Reg = TLI->getExceptionPointerRegister())
FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
new file mode 100644
index 0000000..1271f6b
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -0,0 +1,679 @@
+//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "StatepointLowering.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "statepoint-lowering"
+
+STATISTIC(NumSlotsAllocatedForStatepoints,
+ "Number of stack slots allocated for statepoints");
+STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
+STATISTIC(StatepointMaxSlotsRequired,
+ "Maximum number of stack slots required for a singe statepoint");
+
+void
+StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
+ // Consistency check
+ assert(PendingGCRelocateCalls.empty() &&
+ "Trying to visit statepoint before finished processing previous one");
+ Locations.clear();
+ RelocLocations.clear();
+ NextSlotToAllocate = 0;
+ // Need to resize this on each safepoint - we need the two to stay in
+ // sync and the clear patterns of a SelectionDAGBuilder have no relation
+ // to FunctionLoweringInfo.
+ AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
+ for (size_t i = 0; i < AllocatedStackSlots.size(); i++) {
+ AllocatedStackSlots[i] = false;
+ }
+}
+void StatepointLoweringState::clear() {
+ Locations.clear();
+ RelocLocations.clear();
+ AllocatedStackSlots.clear();
+ assert(PendingGCRelocateCalls.empty() &&
+ "cleared before statepoint sequence completed");
+}
+
+SDValue
+StatepointLoweringState::allocateStackSlot(EVT ValueType,
+ SelectionDAGBuilder &Builder) {
+
+ NumSlotsAllocatedForStatepoints++;
+
+ // The basic scheme here is to first look for a previously created stack slot
+ // which is not in use (accounting for the fact arbitrary slots may already
+ // be reserved), or to create a new stack slot and use it.
+
+ // If this doesn't succeed in 40000 iterations, something is seriously wrong
+ for (int i = 0; i < 40000; i++) {
+ assert(Builder.FuncInfo.StatepointStackSlots.size() ==
+ AllocatedStackSlots.size() &&
+ "broken invariant");
+ const size_t NumSlots = AllocatedStackSlots.size();
+ assert(NextSlotToAllocate <= NumSlots && "broken invariant");
+
+ if (NextSlotToAllocate >= NumSlots) {
+ assert(NextSlotToAllocate == NumSlots);
+ // record stats
+ if (NumSlots + 1 > StatepointMaxSlotsRequired) {
+ StatepointMaxSlotsRequired = NumSlots + 1;
+ }
+
+ SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
+ const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+ AllocatedStackSlots.push_back(true);
+ return SpillSlot;
+ }
+ if (!AllocatedStackSlots[NextSlotToAllocate]) {
+ const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
+ AllocatedStackSlots[NextSlotToAllocate] = true;
+ return Builder.DAG.getFrameIndex(FI, ValueType);
+ }
+ // Note: We deliberately choose to advance this only on the failing path.
+ // Doing so on the suceeding path involes a bit of complexity that caused a
+ // minor bug previously. Unless performance shows this matters, please
+ // keep this code as simple as possible.
+ NextSlotToAllocate++;
+ }
+ llvm_unreachable("infinite loop?");
+}
+
+/// Try to find existing copies of the incoming values in stack slots used for
+/// statepoint spilling. If we can find a spill slot for the incoming value,
+/// mark that slot as allocated, and reuse the same slot for this safepoint.
+/// This helps to avoid series of loads and stores that only serve to resuffle
+/// values on the stack between calls.
+static void reservePreviousStackSlotForValue(SDValue Incoming,
+ SelectionDAGBuilder &Builder) {
+
+ if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
+ // We won't need to spill this, so no need to check for previously
+ // allocated stack slots
+ return;
+ }
+
+ SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+ if (Loc.getNode()) {
+ // duplicates in input
+ return;
+ }
+
+ // Search back for the load from a stack slot pattern to find the original
+ // slot we allocated for this value. We could extend this to deal with
+ // simple modification patterns, but simple dealing with trivial load/store
+ // sequences helps a lot already.
+ if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Incoming)) {
+ if (auto *FI = dyn_cast<FrameIndexSDNode>(Load->getBasePtr())) {
+ const int Index = FI->getIndex();
+ auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(),
+ Builder.FuncInfo.StatepointStackSlots.end(), Index);
+ if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) {
+ // not one of the lowering stack slots, can't reuse!
+ // TODO: Actually, we probably could reuse the stack slot if the value
+ // hasn't changed at all, but we'd need to look for intervening writes
+ return;
+ } else {
+ // This is one of our dedicated lowering slots
+ const int Offset =
+ std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr);
+ if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
+ // stack slot already assigned to someone else, can't use it!
+ // TODO: currently we reserve space for gc arguments after doing
+ // normal allocation for deopt arguments. We should reserve for
+ // _all_ deopt and gc arguments, then start allocating. This
+ // will prevent some moves being inserted when vm state changes,
+ // but gc state doesn't between two calls.
+ return;
+ }
+ // Reserve this stack slot
+ Builder.StatepointLowering.reserveStackSlot(Offset);
+ }
+
+ // Cache this slot so we find it when going through the normal
+ // assignment loop.
+ SDValue Loc =
+ Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
+
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+ }
+ }
+
+ // TODO: handle case where a reloaded value flows through a phi to
+ // another safepoint. e.g.
+ // bb1:
+ // a' = relocated...
+ // bb2: % pred: bb1, bb3, bb4, etc.
+ // a_phi = phi(a', ...)
+ // statepoint ... a_phi
+ // NOTE: This will require reasoning about cross basic block values. This is
+ // decidedly non trivial and this might not be the right place to do it. We
+ // don't really have the information we need here...
+
+ // TODO: handle simple updates. If a value is modified and the original
+ // value is no longer live, it would be nice to put the modified value in the
+ // same slot. This allows folding of the memory accesses for some
+ // instructions types (like an increment).
+ // statepoint (i)
+ // i1 = i+1
+ // statepoint (i1)
+}
+
+/// Remove any duplicate (as SDValues) from the derived pointer pairs. This
+/// is not required for correctness. It's purpose is to reduce the size of
+/// StackMap section. It has no effect on the number of spill slots required
+/// or the actual lowering.
+static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
+ SmallVectorImpl<const Value *> &Ptrs,
+ SmallVectorImpl<const Value *> &Relocs,
+ SelectionDAGBuilder &Builder) {
+
+ // This is horribly ineffecient, but I don't care right now
+ SmallSet<SDValue, 64> Seen;
+
+ SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs;
+ for (size_t i = 0; i < Ptrs.size(); i++) {
+ SDValue SD = Builder.getValue(Ptrs[i]);
+ // Only add non-duplicates
+ if (Seen.count(SD) == 0) {
+ NewBases.push_back(Bases[i]);
+ NewPtrs.push_back(Ptrs[i]);
+ NewRelocs.push_back(Relocs[i]);
+ }
+ Seen.insert(SD);
+ }
+ assert(Bases.size() >= NewBases.size());
+ assert(Ptrs.size() >= NewPtrs.size());
+ assert(Relocs.size() >= NewRelocs.size());
+ Bases = NewBases;
+ Ptrs = NewPtrs;
+ Relocs = NewRelocs;
+ assert(Ptrs.size() == Bases.size());
+ assert(Ptrs.size() == Relocs.size());
+}
+
+/// Extract call from statepoint, lower it and return pointer to the
+/// call node. Also update NodeMap so that getValue(statepoint) will
+/// reference lowered call result
+static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
+ SelectionDAGBuilder &Builder) {
+
+ ImmutableCallSite CS(StatepointSite.getCallSite());
+
+ // Lower the actual call itself - This is a bit of a hack, but we want to
+ // avoid modifying the actual lowering code. This is similiar in intent to
+ // the LowerCallOperands mechanism used by PATCHPOINT, but is structured
+ // differently. Hopefully, this is slightly more robust w.r.t. calling
+ // convention, return values, and other function attributes.
+ Value *ActualCallee = const_cast<Value *>(StatepointSite.actualCallee());
+
+ std::vector<Value *> Args;
+ CallInst::const_op_iterator arg_begin = StatepointSite.call_args_begin();
+ CallInst::const_op_iterator arg_end = StatepointSite.call_args_end();
+ Args.insert(Args.end(), arg_begin, arg_end);
+ // TODO: remove the creation of a new instruction! We should not be
+ // modifying the IR (even temporarily) at this point.
+ CallInst *Tmp = CallInst::Create(ActualCallee, Args);
+ Tmp->setTailCall(CS.isTailCall());
+ Tmp->setCallingConv(CS.getCallingConv());
+ Tmp->setAttributes(CS.getAttributes());
+ Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false);
+
+ // Handle the return value of the call iff any.
+ const bool HasDef = !Tmp->getType()->isVoidTy();
+ if (HasDef) {
+ // The value of the statepoint itself will be the value of call itself.
+ // We'll replace the actually call node shortly. gc_result will grab
+ // this value.
+ Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp));
+ } else {
+ // The token value is never used from here on, just generate a poison value
+ Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1));
+ }
+ // Remove the fake entry we created so we don't have a hanging reference
+ // after we delete this node.
+ Builder.removeValue(Tmp);
+ delete Tmp;
+ Tmp = nullptr;
+
+ // Search for the call node
+ // The following code is essentially reverse engineering X86's
+ // LowerCallTo.
+ SDNode *CallNode = nullptr;
+
+ // We just emitted a call, so it should be last thing generated
+ SDValue Chain = Builder.DAG.getRoot();
+
+ // Find closest CALLSEQ_END walking back through lowered nodes if needed
+ SDNode *CallEnd = Chain.getNode();
+ int Sanity = 0;
+ while (CallEnd->getOpcode() != ISD::CALLSEQ_END) {
+ CallEnd = CallEnd->getGluedNode();
+ assert(CallEnd && "Can not find call node");
+ assert(Sanity < 20 && "should have found call end already");
+ Sanity++;
+ }
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ assert(CallEnd->getGluedNode());
+
+ // Step back inside the CALLSEQ
+ CallNode = CallEnd->getGluedNode();
+ return CallNode;
+}
+
+/// Callect all gc pointers coming into statepoint intrinsic, clean them up,
+/// and return two arrays:
+/// Bases - base pointers incoming to this statepoint
+/// Ptrs - derived pointers incoming to this statepoint
+/// Relocs - the gc_relocate corresponding to each base/ptr pair
+/// Elements of this arrays should be in one-to-one correspondence with each
+/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call
+static void
+getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases,
+ SmallVectorImpl<const Value *> &Ptrs,
+ SmallVectorImpl<const Value *> &Relocs,
+ ImmutableStatepoint StatepointSite,
+ SelectionDAGBuilder &Builder) {
+ for (GCRelocateOperands relocateOpers :
+ StatepointSite.getRelocates(StatepointSite)) {
+ Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
+ Bases.push_back(relocateOpers.basePtr());
+ Ptrs.push_back(relocateOpers.derivedPtr());
+ }
+
+ // Remove any redundant llvm::Values which map to the same SDValue as another
+ // input. Also has the effect of removing duplicates in the original
+ // llvm::Value input list as well. This is a useful optimization for
+ // reducing the size of the StackMap section. It has no other impact.
+ removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder);
+
+ assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size());
+}
+
+/// Spill a value incoming to the statepoint. It might be either part of
+/// vmstate
+/// or gcstate. In both cases unconditionally spill it on the stack unless it
+/// is a null constant. Return pair with first element being frame index
+/// containing saved value and second element with outgoing chain from the
+/// emitted store
+static std::pair<SDValue, SDValue>
+spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
+ SelectionDAGBuilder &Builder) {
+ SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+
+ // Emit new store if we didn't do it for this ptr before
+ if (!Loc.getNode()) {
+ Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(),
+ Builder);
+ assert(isa<FrameIndexSDNode>(Loc));
+ int Index = cast<FrameIndexSDNode>(Loc)->getIndex();
+ // We use TargetFrameIndex so that isel will not select it into LEA
+ Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
+
+ // TODO: We can create TokenFactor node instead of
+ // chaining stores one after another, this may allow
+ // a bit more optimal scheduling for them
+ Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
+ MachinePointerInfo::getFixedStack(Index),
+ false, false, 0);
+
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+ }
+
+ assert(Loc.getNode());
+ return std::make_pair(Loc, Chain);
+}
+
+/// Lower a single value incoming to a statepoint node. This value can be
+/// either a deopt value or a gc value, the handling is the same. We special
+/// case constants and allocas, then fall back to spilling if required.
+static void lowerIncomingStatepointValue(SDValue Incoming,
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder &Builder) {
+ SDValue Chain = Builder.getRoot();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+ // If the original value was a constant, make sure it gets recorded as
+ // such in the stackmap. This is required so that the consumer can
+ // parse any internal format to the deopt state. It also handles null
+ // pointers and other constant pointers in GC states
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint
+ const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
+ Ops.push_back(
+ Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+ } else {
+ // Otherwise, locate a spill slot and explicitly spill it so it
+ // can be found by the runtime later. We currently do not support
+ // tracking values through callee saved registers to their eventual
+ // spill location. This would be a useful optimization, but would
+ // need to be optional since it requires a lot of complexity on the
+ // runtime side which not all would support.
+ std::pair<SDValue, SDValue> Res =
+ spillIncomingStatepointValue(Incoming, Chain, Builder);
+ Ops.push_back(Res.first);
+ Chain = Res.second;
+ }
+
+ Builder.DAG.setRoot(Chain);
+}
+
+/// Lower deopt state and gc pointer arguments of the statepoint. The actual
+/// lowering is described in lowerIncomingStatepointValue. This function is
+/// responsible for lowering everything in the right position and playing some
+/// tricks to avoid redundant stack manipulation where possible. On
+/// completion, 'Ops' will contain ready to use operands for machine code
+/// statepoint. The chain nodes will have already been created and the DAG root
+/// will be set to the last value spilled (if any were).
+static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
+ ImmutableStatepoint StatepointSite,
+ SelectionDAGBuilder &Builder) {
+
+ // Lower the deopt and gc arguments for this statepoint. Layout will
+ // be: deopt argument length, deopt arguments.., gc arguments...
+
+ SmallVector<const Value *, 64> Bases, Ptrs, Relocations;
+ getIncomingStatepointGCValues(Bases, Ptrs, Relocations,
+ StatepointSite, Builder);
+
+#ifndef NDEBUG
+ // Check that each of the gc pointer and bases we've gotten out of the
+ // safepoint is something the strategy thinks might be a pointer into the GC
+ // heap. This is basically just here to help catch errors during statepoint
+ // insertion. TODO: This should actually be in the Verifier, but we can't get
+ // to the GCStrategy from there (yet).
+ if (Builder.GFI) {
+ GCStrategy &S = Builder.GFI->getStrategy();
+ for (const Value *V : Bases) {
+ auto Opt = S.isGCManagedPointer(V);
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed base pointer found in statepoint");
+ }
+ }
+ for (const Value *V : Ptrs) {
+ auto Opt = S.isGCManagedPointer(V);
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed derived pointer found in statepoint");
+ }
+ }
+ for (const Value *V : Relocations) {
+ auto Opt = S.isGCManagedPointer(V);
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() && "non gc managed pointer relocated");
+ }
+ }
+ }
+#endif
+
+
+
+ // Before we actually start lowering (and allocating spill slots for values),
+ // reserve any stack slots which we judge to be profitable to reuse for a
+ // particular value. This is purely an optimization over the code below and
+ // doesn't change semantics at all. It is important for performance that we
+ // reserve slots for both deopt and gc values before lowering either.
+ for (auto I = StatepointSite.vm_state_begin() + 1,
+ E = StatepointSite.vm_state_end();
+ I != E; ++I) {
+ Value *V = *I;
+ SDValue Incoming = Builder.getValue(V);
+ reservePreviousStackSlotForValue(Incoming, Builder);
+ }
+ for (unsigned i = 0; i < Bases.size() * 2; ++i) {
+ // Even elements will contain base, odd elements - derived ptr
+ const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2];
+ SDValue Incoming = Builder.getValue(V);
+ reservePreviousStackSlotForValue(Incoming, Builder);
+ }
+
+ // First, prefix the list with the number of unique values to be
+ // lowered. Note that this is the number of *Values* not the
+ // number of SDValues required to lower them.
+ const int NumVMSArgs = StatepointSite.numTotalVMSArgs();
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64));
+
+ assert(NumVMSArgs + 1 == std::distance(StatepointSite.vm_state_begin(),
+ StatepointSite.vm_state_end()));
+
+ // The vm state arguments are lowered in an opaque manner. We do
+ // not know what type of values are contained within. We skip the
+ // first one since that happens to be the total number we lowered
+ // explicitly just above. We could have left it in the loop and
+ // not done it explicitly, but it's far easier to understand this
+ // way.
+ for (auto I = StatepointSite.vm_state_begin() + 1,
+ E = StatepointSite.vm_state_end();
+ I != E; ++I) {
+ const Value *V = *I;
+ SDValue Incoming = Builder.getValue(V);
+ lowerIncomingStatepointValue(Incoming, Ops, Builder);
+ }
+
+ // Finally, go ahead and lower all the gc arguments. There's no prefixed
+ // length for this one. After lowering, we'll have the base and pointer
+ // arrays interwoven with each (lowered) base pointer immediately followed by
+ // it's (lowered) derived pointer. i.e
+ // (base[0], ptr[0], base[1], ptr[1], ...)
+ for (unsigned i = 0; i < Bases.size() * 2; ++i) {
+ // Even elements will contain base, odd elements - derived ptr
+ const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2];
+ SDValue Incoming = Builder.getValue(V);
+ lowerIncomingStatepointValue(Incoming, Ops, Builder);
+ }
+}
+
+void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
+ // Check some preconditions for sanity
+ assert(isStatepoint(&CI) &&
+ "function called must be the statepoint function");
+
+ LowerStatepoint(ImmutableStatepoint(&CI));
+}
+
+void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) {
+ // The basic scheme here is that information about both the original call and
+ // the safepoint is encoded in the CallInst. We create a temporary call and
+ // lower it, then reverse engineer the calling sequence.
+
+ NumOfStatepoints++;
+ // Clear state
+ StatepointLowering.startNewStatepoint(*this);
+
+ ImmutableCallSite CS(ISP.getCallSite());
+
+#ifndef NDEBUG
+ // Consistency check
+ for (const User *U : CS->users()) {
+ const CallInst *Call = cast<CallInst>(U);
+ if (isGCRelocate(Call))
+ StatepointLowering.scheduleRelocCall(*Call);
+ }
+#endif
+
+#ifndef NDEBUG
+ // If this is a malformed statepoint, report it early to simplify debugging.
+ // This should catch any IR level mistake that's made when constructing or
+ // transforming statepoints.
+ ISP.verify();
+
+ // Check that the associated GCStrategy expects to encounter statepoints.
+ // TODO: This if should become an assert. For now, we allow the GCStrategy
+ // to be optional for backwards compatibility. This will only last a short
+ // period (i.e. a couple of weeks).
+ if (GFI) {
+ assert(GFI->getStrategy().useStatepoints() &&
+ "GCStrategy does not expect to encounter statepoints");
+ }
+#endif
+
+
+ // Lower statepoint vmstate and gcstate arguments
+ SmallVector<SDValue, 10> LoweredArgs;
+ lowerStatepointMetaArgs(LoweredArgs, ISP, *this);
+
+ // Get call node, we will replace it later with statepoint
+ SDNode *CallNode = lowerCallFromStatepoint(ISP, *this);
+
+ // Construct the actual STATEPOINT node with all the appropriate arguments
+ // and return values.
+
+ // TODO: Currently, all of these operands are being marked as read/write in
+ // PrologEpilougeInserter.cpp, we should special case the VMState arguments
+ // and flags to be read-only.
+ SmallVector<SDValue, 40> Ops;
+
+ // Calculate and push starting position of vmstate arguments
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ SDValue Glue;
+ if (CallNode->getGluedNode()) {
+ // Glue is always last operand
+ Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
+ }
+ // Get number of arguments incoming directly into call node
+ unsigned NumCallRegArgs =
+ CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3);
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+
+ // Add call target
+ SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
+ Ops.push_back(CallTarget);
+
+ // Add call arguments
+ // Get position of register mask in the call
+ SDNode::op_iterator RegMaskIt;
+ if (Glue.getNode())
+ RegMaskIt = CallNode->op_end() - 2;
+ else
+ RegMaskIt = CallNode->op_end() - 1;
+ Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);
+
+ // Add a leading constant argument with the Flags and the calling convention
+ // masked together
+ CallingConv::ID CallConv = CS.getCallingConv();
+ int Flags = dyn_cast<ConstantInt>(CS.getArgument(2))->getZExtValue();
+ assert(Flags == 0 && "not expected to be used");
+ Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(
+ DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64));
+
+ // Insert all vmstate and gcstate arguments
+ Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end());
+
+ // Add register mask from call node
+ Ops.push_back(*RegMaskIt);
+
+ // Add chain
+ Ops.push_back(CallNode->getOperand(0));
+
+ // Same for the glue, but we add it only if original call had it
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Compute return values. Provide a glue output since we consume one as
+ // input. This allows someone else to chain off us as needed.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT,
+ getCurSDLoc(), NodeTys, Ops);
+
+ // Replace original call
+ DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root
+ // Remove originall call node
+ DAG.DeleteNode(CallNode);
+
+ // DON'T set the root - under the assumption that it's already set past the
+ // inserted node we created.
+
+ // TODO: A better future implementation would be to emit a single variable
+ // argument, variable return value STATEPOINT node here and then hookup the
+ // return value of each gc.relocate to the respective output of the
+ // previously emitted STATEPOINT value. Unfortunately, this doesn't appear
+ // to actually be possible today.
+}
+
+void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
+ // The result value of the gc_result is simply the result of the actual
+ // call. We've already emitted this, so just grab the value.
+ Instruction *I = cast<Instruction>(CI.getArgOperand(0));
+ assert(isStatepoint(I) &&
+ "first argument must be a statepoint token");
+
+ setValue(&CI, getValue(I));
+}
+
+void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
+#ifndef NDEBUG
+ // Consistency check
+ StatepointLowering.relocCallVisited(CI);
+#endif
+
+ GCRelocateOperands relocateOpers(&CI);
+ SDValue SD = getValue(relocateOpers.derivedPtr());
+
+ if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) {
+ // We didn't need to spill these special cases (constants and allocas).
+ // See the handling in spillIncomingValueForStatepoint for detail.
+ setValue(&CI, SD);
+ return;
+ }
+
+ SDValue Loc = StatepointLowering.getRelocLocation(SD);
+ // Emit new load if we did not emit it before
+ if (!Loc.getNode()) {
+ SDValue SpillSlot = StatepointLowering.getLocation(SD);
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+
+ // Be conservative: flush all pending loads
+ // TODO: Probably we can be less restrictive on this,
+ // it may allow more scheduling opprtunities
+ SDValue Chain = getRoot();
+
+ Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain,
+ SpillSlot, MachinePointerInfo::getFixedStack(FI), false,
+ false, false, 0);
+
+ StatepointLowering.setRelocLocation(SD, Loc);
+
+ // Again, be conservative, don't emit pending loads
+ DAG.setRoot(Loc.getValue(1));
+ }
+
+ assert(Loc.getNode());
+ setValue(&CI, Loc);
+}
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h
new file mode 100644
index 0000000..673112c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -0,0 +1,138 @@
+//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include <vector>
+
+namespace llvm {
+class SelectionDAGBuilder;
+
+/// This class tracks both per-statepoint and per-selectiondag information.
+/// For each statepoint it tracks locations of it's gc valuess (incoming and
+/// relocated) and list of gcreloc calls scheduled for visiting (this is
+/// used for a debug mode consistency check only). The spill slot tracking
+/// works in concert with information in FunctionLoweringInfo.
+class StatepointLoweringState {
+public:
+ StatepointLoweringState() : NextSlotToAllocate(0) {
+ }
+
+ /// Reset all state tracking for a newly encountered safepoint. Also
+ /// performs some consistency checking.
+ void startNewStatepoint(SelectionDAGBuilder &Builder);
+
+ /// Clear the memory usage of this object. This is called from
+ /// SelectionDAGBuilder::clear. We require this is never called in the
+ /// midst of processing a statepoint sequence.
+ void clear();
+
+ /// Returns the spill location of a value incoming to the current
+ /// statepoint. Will return SDValue() if this value hasn't been
+ /// spilled. Otherwise, the value has already been spilled and no
+ /// further action is required by the caller.
+ SDValue getLocation(SDValue val) {
+ if (!Locations.count(val))
+ return SDValue();
+ return Locations[val];
+ }
+ void setLocation(SDValue val, SDValue Location) {
+ assert(!Locations.count(val) &&
+ "Trying to allocate already allocated location");
+ Locations[val] = Location;
+ }
+
+ /// Returns the relocated value for a given input pointer. Will
+ /// return SDValue() if this value hasn't yet been reloaded from
+ /// it's stack slot after the statepoint. Otherwise, the value
+ /// has already been reloaded and the SDValue of that reload will
+ /// be returned. Note that VMState values are spilled but not
+ /// reloaded (since they don't change at the safepoint unless
+ /// also listed in the GC pointer section) and will thus never
+ /// be in this map
+ SDValue getRelocLocation(SDValue val) {
+ if (!RelocLocations.count(val))
+ return SDValue();
+ return RelocLocations[val];
+ }
+ void setRelocLocation(SDValue val, SDValue Location) {
+ assert(!RelocLocations.count(val) &&
+ "Trying to allocate already allocated location");
+ RelocLocations[val] = Location;
+ }
+
+ /// Record the fact that we expect to encounter a given gc_relocate
+ /// before the next statepoint. If we don't see it, we'll report
+ /// an assertion.
+ void scheduleRelocCall(const CallInst &RelocCall) {
+ PendingGCRelocateCalls.push_back(&RelocCall);
+ }
+ /// Remove this gc_relocate from the list we're expecting to see
+ /// before the next statepoint. If we weren't expecting to see
+ /// it, we'll report an assertion.
+ void relocCallVisited(const CallInst &RelocCall) {
+ SmallVectorImpl<const CallInst *>::iterator itr =
+ std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(),
+ &RelocCall);
+ assert(itr != PendingGCRelocateCalls.end() &&
+ "Visited unexpected gcrelocate call");
+ PendingGCRelocateCalls.erase(itr);
+ }
+
+ // TODO: Should add consistency tracking to ensure we encounter
+ // expected gc_result calls too.
+
+ /// Get a stack slot we can use to store an value of type ValueType. This
+ /// will hopefully be a recylced slot from another statepoint.
+ SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder);
+
+ void reserveStackSlot(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ assert(!AllocatedStackSlots[Offset] && "already reserved!");
+ assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!");
+ AllocatedStackSlots[Offset] = true;
+ }
+ bool isStackSlotAllocated(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ return AllocatedStackSlots[Offset];
+ }
+
+private:
+ /// Maps pre-relocation value (gc pointer directly incoming into statepoint)
+ /// into it's location (currently only stack slots)
+ DenseMap<SDValue, SDValue> Locations;
+ /// Map pre-relocated value into it's new relocated location
+ DenseMap<SDValue, SDValue> RelocLocations;
+
+ /// A boolean indicator for each slot listed in the FunctionInfo as to
+ /// whether it has been used in the current statepoint. Since we try to
+ /// preserve stack slots across safepoints, there can be gaps in which
+ /// slots have been allocated.
+ SmallVector<bool, 50> AllocatedStackSlots;
+
+ /// Points just beyond the last slot known to have been allocated
+ unsigned NextSlotToAllocate;
+
+ /// Keep track of pending gcrelocate calls for consistency check
+ SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9aef5ed..0a3c926 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -793,19 +793,26 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
// If we only care about the highest bit, don't bother shifting right.
- if (MsbMask == DemandedMask) {
+ if (MsbMask == NewMask) {
unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
SDValue InOp = Op.getOperand(0);
-
- // Compute the correct shift amount type, which must be getShiftAmountTy
- // for scalar types after legalization.
- EVT ShiftAmtTy = Op.getValueType();
- if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
- ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
-
- SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
- Op.getValueType(), InOp, ShiftAmt));
+ unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+ bool AlreadySignExtended =
+ TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+ // However if the input is already sign extended we expect the sign
+ // extension to be dropped altogether later and do not simplify.
+ if (!AlreadySignExtended) {
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp,
+ ShiftAmt));
+ }
}
// Sign extension. Compute the demanded bits in the result that are not
@@ -1283,36 +1290,53 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// (zext x) == C --> x == (trunc C)
- if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // (sext x) == C --> x == (trunc C)
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ DCI.isBeforeLegalize() && N0->hasOneUse()) {
unsigned MinBits = N0.getValueSizeInBits();
- SDValue PreZExt;
+ SDValue PreExt;
+ bool Signed = false;
if (N0->getOpcode() == ISD::ZERO_EXTEND) {
// ZExt
MinBits = N0->getOperand(0).getValueSizeInBits();
- PreZExt = N0->getOperand(0);
+ PreExt = N0->getOperand(0);
} else if (N0->getOpcode() == ISD::AND) {
// DAGCombine turns costly ZExts into ANDs
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
if ((C->getAPIntValue()+1).isPowerOf2()) {
MinBits = C->getAPIntValue().countTrailingOnes();
- PreZExt = N0->getOperand(0);
+ PreExt = N0->getOperand(0);
}
+ } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
+ // SExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreExt = N0->getOperand(0);
+ Signed = true;
} else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
- // ZEXTLOAD
+ // ZEXTLOAD / SEXTLOAD
if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
MinBits = LN0->getMemoryVT().getSizeInBits();
- PreZExt = N0;
+ PreExt = N0;
+ } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
+ Signed = true;
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreExt = N0;
}
}
+ // Figure out how many bits we need to preserve this constant.
+ unsigned ReqdBits = Signed ?
+ C1.getBitWidth() - C1.getNumSignBits() + 1 :
+ C1.getActiveBits();
+
// Make sure we're not losing bits from the constant.
if (MinBits > 0 &&
- MinBits < C1.getBitWidth() && MinBits >= C1.getActiveBits()) {
+ MinBits < C1.getBitWidth() &&
+ MinBits >= ReqdBits) {
EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
// Will get folded away.
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
}
@@ -2163,9 +2187,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
-std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
+ const std::string &Constraint,
+ MVT VT) const {
if (Constraint.empty() || Constraint[0] != '{')
return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
@@ -2177,8 +2202,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
// Figure out which register class contains this reg.
- const TargetRegisterInfo *RI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
@@ -2231,8 +2254,9 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
/// and also tie in the associated operand values.
/// If this returns an empty vector, and if the constraint string itself
/// isn't empty, there was an error parsing.
-TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
- ImmutableCallSite CS) const {
+TargetLowering::AsmOperandInfoVector
+TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
+ ImmutableCallSite CS) const {
/// ConstraintOperands - Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
@@ -2323,7 +2347,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
}
// If we have multiple alternative constraints, select the best alternative.
- if (ConstraintOperands.size()) {
+ if (!ConstraintOperands.empty()) {
if (maCount) {
unsigned bestMAIndex = 0;
int bestWeight = -1;
@@ -2394,12 +2418,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- getRegForInlineAsmConstraint(Input.ConstraintCode,
- Input.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+ Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {