aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp529
1 files changed, 362 insertions, 167 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2d2fd53..7c42e4d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -167,9 +167,18 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
- SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
+ /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// load.
+ ///
+ /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+ /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+ /// \param EltNo index of the vector element to load.
+ /// \param OriginalLoad load that EVE came from to be replaced.
+ /// \returns EVE on success SDValue() on failure.
+ SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -646,10 +655,14 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) {
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- ConstantSDNode *CN = BV->getConstantSplatValue();
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
- if (CN && CN->getValueType(0) == N.getValueType().getScalarType())
+ // FIXME: We blindly ignore splats which include undef which is overly
+ // pessimistic.
+ if (CN && UndefElements.none() &&
+ CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
@@ -762,14 +775,10 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// If the operands of this node are only used by the node, they will now
// be dead. Make sure to visit them first to delete dead nodes early.
- for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) {
- SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode();
- // For an operand generating multiple values, one of the values may
- // become dead allowing further simplification (e.g. split index
- // arithmetic from an indexed load).
- if (Op->hasOneUse() || Op->getNumValues() > 1)
- AddToWorkList(Op);
- }
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
DAG.DeleteNode(TLO.Old.getNode());
}
}
@@ -1320,9 +1329,16 @@ SDValue DAGCombiner::combine(SDNode *N) {
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
- SDValue Ops[] = { N1, N0 };
- SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
- Ops);
+ SDValue Ops[] = {N1, N0};
+ SDNode *CSENode;
+ if (const BinaryWithFlagsSDNode *BinNode =
+ dyn_cast<BinaryWithFlagsSDNode>(N)) {
+ CSENode = DAG.getNodeIfExists(
+ N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
+ BinNode->hasNoSignedWrap(), BinNode->isExact());
+ } else {
+ CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
+ }
if (CSENode)
return SDValue(CSENode, 0);
}
@@ -3942,14 +3958,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// If setcc produces all-one true value then:
// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
if (N1CV && N1CV->isConstant()) {
- if (N0.getOpcode() == ISD::AND &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ if (N0.getOpcode() == ISD::AND) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
- if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+ TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
if (C.getNode())
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
@@ -4508,11 +4524,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
if (VT.isInteger() &&
- (VT0 == MVT::i1 ||
- (VT0.isInteger() &&
- TLI.getBooleanContents(false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
+ (VT0 == MVT::i1 || (VT0.isInteger() &&
+ TLI.getBooleanContents(false, false) ==
+ TLI.getBooleanContents(false, true) &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
if (VT == VT0)
@@ -4555,12 +4580,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
- // FIXME:
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ if ((!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
@@ -4587,6 +4609,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
return std::make_pair(Lo, Hi);
}
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ MVT VT = N->getSimpleValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+ // We're sure we have an even number of elements due to the
+ // concat_vectors we have as arguments to vselect.
+ // Skip BV elements until we find one that's not an UNDEF
+ // After we find an UNDEF element, keep looping until we get to half the
+ // length of the BV and see if all the non-undef nodes are the same.
+ ConstantSDNode *BottomHalf = nullptr;
+ for (int i = 0; i < NumElems / 2; ++i) {
+ if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (BottomHalf == nullptr)
+ BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != BottomHalf)
+ return SDValue();
+ }
+
+ // Do the same for the second half of the BuildVector
+ ConstantSDNode *TopHalf = nullptr;
+ for (int i = NumElems / 2; i < NumElems; ++i) {
+ if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (TopHalf == nullptr)
+ TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != TopHalf)
+ return SDValue();
+ }
+
+ assert(TopHalf && BottomHalf &&
+ "One half of the selector was all UNDEFs and the other was all the "
+ "same value. This should have been addressed before this function.");
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, VT,
+ BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4659,6 +4731,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N2;
+ // The ConvertSelectToConcatVector function is assuming both the above
+ // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+ // and addressed.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N2.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ SDValue CV = ConvertSelectToConcatVector(N, DAG);
+ if (CV.getNode())
+ return CV;
+ }
+
return SDValue();
}
@@ -5003,12 +5086,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
if (N0.getOpcode() == ISD::SETCC) {
+ EVT N0VT = N0.getOperand(0).getValueType();
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
- EVT N0VT = N0.getOperand(0).getValueType();
+ TLI.getBooleanContents(N0VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
@@ -6140,6 +6223,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
+ TLI.hasBigEndianPartOrdering(VT) &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -6955,11 +7041,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- // (See also visitSELECT)
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
@@ -7012,11 +7094,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- // (See also visitSELECT)
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
@@ -7849,17 +7927,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
-/// \brief Return the base-pointer arithmetic from an indexed \p LD.
-SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
- ISD::MemIndexedMode AM = LD->getAddressingMode();
- assert(AM != ISD::UNINDEXED);
- SDValue BP = LD->getOperand(1);
- SDValue Inc = LD->getOperand(2);
- unsigned Opc =
- (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
- return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
-}
-
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -7896,16 +7963,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (!N->hasAnyUseOfValue(0)) {
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
- SDValue Index;
- if (N->hasAnyUseOfValue(1)) {
- Index = SplitIndexingFromLoad(LD);
- // Try to fold the base pointer arithmetic into subsequent loads and
- // stores.
- AddUsersToWorkList(N);
- } else
- Index = DAG.getUNDEF(N->getValueType(1));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
@@ -7913,7 +7972,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);
@@ -9666,6 +9726,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return SDValue();
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ // Canonicalize insert_vector_elt dag nodes.
+ // Example:
+ // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+ // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+ //
+ // Do this only if the child insert_vector node has one use; also
+ // do this only if indices are both constants and Idx1 < Idx0.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+ && isa<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned OtherElt =
+ cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+ if (Elt < OtherElt) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+ InVec.getOperand(0), InVal, EltNo);
+ AddToWorkList(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+ VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+ }
+ }
+
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
@@ -9698,6 +9779,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+ EVT ResultVT = EVE->getValueType(0);
+ EVT VecEltVT = InVecVT.getVectorElementType();
+ unsigned Align = OriginalLoad->getAlignment();
+ unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+ return SDValue();
+
+ Align = NewAlign;
+
+ SDValue NewPtr = OriginalLoad->getBasePtr();
+ SDValue Offset;
+ EVT PtrType = NewPtr.getValueType();
+ MachinePointerInfo MPI;
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+ int Elt = ConstEltNo->getZExtValue();
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+ if (TLI.isBigEndian())
+ PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
+ Offset = DAG.getConstant(PtrOff, PtrType);
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ } else {
+ Offset = DAG.getNode(
+ ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
+ DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
+ if (TLI.isBigEndian())
+ Offset = DAG.getNode(
+ ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
+ DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+ MPI = OriginalLoad->getPointerInfo();
+ }
+ NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (ResultVT.bitsGT(VecEltVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+ ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
+ NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
+ OriginalLoad->isNonTemporal(), Align,
+ OriginalLoad->getTBAAInfo());
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(
+ VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
+ OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
+ Chain = Load.getValue(1);
+ if (ResultVT.bitsLT(VecEltVT))
+ Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Since we're explicitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(EVE);
+ ++OpsNarrowed;
+ return SDValue(EVE, 0);
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
@@ -9766,6 +9927,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ }
+
+ // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
+ if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
+ ISD::isNormalLoad(InVec.getNode())) {
+ SDValue Index = N->getOperand(1);
+ if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+ OrigLoad);
+ }
+
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
@@ -9776,30 +9969,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- bool NewLoad = false;
- bool BCNumEltsChanged = false;
- EVT ExtVT = VT.getVectorElementType();
- EVT LVT = ExtVT;
-
- // If the result of load has to be truncated, then it's not necessarily
- // profitable.
- if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
- return SDValue();
-
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- EVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
- return SDValue();
- if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
- BCNumEltsChanged = true;
- InVec = InVec.getOperand(0);
- ExtVT = BCVT.getVectorElementType();
- NewLoad = true;
- }
LoadSDNode *LN0 = nullptr;
const ShuffleVectorSDNode *SVN = nullptr;
@@ -9842,6 +10011,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+ EltNo = DAG.getConstant(Elt, EltNo.getValueType());
}
}
@@ -9854,72 +10024,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (Elt == -1)
return DAG.getUNDEF(LVT);
- unsigned Align = LN0->getAlignment();
- if (NewLoad) {
- // Check the resultant load doesn't need a higher alignment than the
- // original load.
- unsigned NewAlign =
- TLI.getDataLayout()
- ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
- return SDValue();
-
- Align = NewAlign;
- }
-
- SDValue NewPtr = LN0->getBasePtr();
- unsigned PtrOff = 0;
-
- if (Elt) {
- PtrOff = LVT.getSizeInBits() * Elt / 8;
- EVT PtrType = NewPtr.getValueType();
- if (TLI.isBigEndian())
- PtrOff = VT.getSizeInBits() / 8 - PtrOff;
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
- DAG.getConstant(PtrOff, PtrType));
- }
-
- // The replacement we need to do here is a little tricky: we need to
- // replace an extractelement of a load with a load.
- // Use ReplaceAllUsesOfValuesWith to do the replacement.
- // Note that this replacement assumes that the extractvalue is the only
- // use of the load; that's okay because we don't want to perform this
- // transformation in other cases anyway.
- SDValue Load;
- SDValue Chain;
- if (NVT.bitsGT(LVT)) {
- // If the result type of vextract is wider than the load, then issue an
- // extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
- ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
- NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
- LVT, LN0->isVolatile(), LN0->isNonTemporal(),
- Align, LN0->getTBAAInfo());
- Chain = Load.getValue(1);
- } else {
- Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Align, LN0->getTBAAInfo());
- Chain = Load.getValue(1);
- if (NVT.bitsLT(LVT))
- Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
- else
- Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
- }
- WorkListRemover DeadNodes(*this);
- SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
- SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
- // Since we're explcitly calling ReplaceAllUses, add the new node to the
- // worklist explicitly as well.
- AddToWorkList(Load.getNode());
- AddUsersToWorkList(Load.getNode()); // Add users too
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorkList(N);
- return SDValue(N, 0);
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
}
return SDValue();
@@ -10280,10 +10385,24 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Opnds;
unsigned BuildVecNumElts = N0.getNumOperands();
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
+ EVT SclTy0 = N0.getOperand(0)->getValueType(0);
+ EVT SclTy1 = N1.getOperand(0)->getValueType(0);
+ if (SclTy0.isFloatingPoint()) {
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N0.getOperand(i));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N1.getOperand(i));
+ } else {
+ // If BUILD_VECTOR are from built from integer, they may have different
+ // operand types. Get the smaller type and truncate all operands to it.
+ EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N0.getOperand(i)));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N1.getOperand(i)));
+ }
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
@@ -10558,22 +10677,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// If this shuffle node is simply a swizzle of another shuffle node,
- // and it reverses the swizzle of the previous shuffle then we can
- // optimize shuffle(shuffle(x, undef), undef) -> x.
+ // then try to simplify it.
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
N1.getOpcode() == ISD::UNDEF) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
- // Shuffle nodes can only reverse shuffles with a single non-undef value.
- if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
- return SDValue();
-
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(OtherSV->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask.
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
assert(Idx < (int)NumElts && "Index references undef operand");
@@ -10581,13 +10697,71 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// shuffle. Adopt the incoming index.
if (Idx >= 0)
Idx = OtherSV->getMaskElt(Idx);
+ Mask.push_back(Idx);
+ }
+
+ bool CommuteOperands = false;
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
+ // To be valid, the combine shuffle mask should only reference elements
+ // from one of the two vectors in input to the inner shufflevector.
+ bool IsValidMask = true;
+ for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+ // See if the combined mask only reference undefs or elements coming
+ // from the first shufflevector operand.
+ IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
+
+ if (!IsValidMask) {
+ IsValidMask = true;
+ for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+ // Check that all the elements come from the second shuffle operand.
+ IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
+ CommuteOperands = IsValidMask;
+ }
- // The combined shuffle must map each index to itself.
- if (Idx >= 0 && (unsigned)Idx != i)
+ // Early exit if the combined shuffle mask is not valid.
+ if (!IsValidMask)
return SDValue();
}
- return OtherSV->getOperand(0);
+ // See if this pair of shuffles can be safely folded according to either
+ // of the following rules:
+ // shuffle(shuffle(x, y), undef) -> x
+ // shuffle(shuffle(x, undef), undef) -> x
+ // shuffle(shuffle(x, y), undef) -> y
+ bool IsIdentityMask = true;
+ unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
+ for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
+ // Skip Undefs.
+ if (Mask[i] < 0)
+ continue;
+
+ // The combined shuffle must map each index to itself.
+ IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
+ }
+
+ if (IsIdentityMask) {
+ if (CommuteOperands)
+ // optimize shuffle(shuffle(x, y), undef) -> y.
+ return OtherSV->getOperand(1);
+
+ // optimize shuffle(shuffle(x, undef), undef) -> x
+ // optimize shuffle(shuffle(x, y), undef) -> x
+ return OtherSV->getOperand(0);
+ }
+
+ // It may still be beneficial to combine the two shuffles if the
+ // resulting shuffle is legal.
+ if (TLI.isShuffleMaskLegal(Mask, VT)) {
+ if (!CommuteOperands)
+ // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+ &Mask[0]);
+
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
+ &Mask[0]);
+ }
}
return SDValue();
@@ -10729,6 +10903,27 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
}
+ // Type legalization might introduce new shuffles in the DAG.
+ // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
+ // -> (shuffle (VBinOp (A, B)), Undef, Mask).
+ if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
+ isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
+ RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+
+ if (SVN0->getMask().equals(SVN1->getMask())) {
+ EVT VT = N->getValueType(0);
+ SDValue UndefVector = LHS.getOperand(1);
+ SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ LHS.getOperand(0), RHS.getOperand(0));
+ AddUsersToWorkList(N);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
+ &SVN0->getMask()[0]);
+ }
+ }
+
return SDValue();
}
@@ -11080,8 +11275,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
- TLI.getBooleanContents(N0.getValueType().isVector()) ==
- TargetLowering::ZeroOrOneBooleanContent) {
+ TLI.getBooleanContents(N0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.