aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp224
1 files changed, 188 insertions, 36 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7961e66..9466f4d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -234,10 +234,10 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return true;
}
-ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
+ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
- return ISD::ANY_EXTEND;
+ return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
case ISD::SEXTLOAD:
return ISD::SIGN_EXTEND;
case ISD::ZEXTLOAD:
@@ -1484,6 +1484,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
if (N1.getOpcode() == ISD::UNDEF)
commuteShuffle(N1, N2, MaskVec);
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
+
+ for (int i = 0; i < (int)NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts))
+ continue;
+
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
+
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
bool AllLHS = true, AllRHS = true;
@@ -1513,9 +1541,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
return getUNDEF(VT);
// If Identity shuffle return that node.
- bool Identity = true;
+ bool Identity = true, AllSame = true;
for (unsigned i = 0; i != NElts; ++i) {
if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] != MaskVec[0]) AllSame = false;
}
if (Identity && NElts)
return N1;
@@ -1537,18 +1566,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
if (Splat && Splat.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
+ bool SameNumElts =
+ V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
// We only have a splat which can skip shuffles if there is a splatted
// value and no undef lanes rearranged by the shuffle.
if (Splat && UndefElements.none()) {
// Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
// number of elements match or the value splatted is a zero constant.
- if (V.getValueType().getVectorNumElements() ==
- VT.getVectorNumElements())
+ if (SameNumElts)
return N1;
if (auto *C = dyn_cast<ConstantSDNode>(Splat))
if (C->isNullValue())
return N1;
}
+
+ // If the shuffle itself creates a splat, build the vector directly.
+ if (AllSame && SameNumElts) {
+ const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+ SmallVector<SDValue, 8> Ops(NElts, Splatted);
+
+ EVT BuildVT = BV->getValueType(0);
+ SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (BuildVT != VT)
+ NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+ return NewBV;
+ }
}
}
@@ -2323,6 +2369,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
break;
}
+ case ISD::EXTRACT_ELEMENT: {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ const unsigned Index =
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ const unsigned BitWidth = Op.getValueType().getSizeInBits();
+
+ // Remove low part of known bits mask
+ KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
+ KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+
+ // Remove high part of known bit mask
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ break;
+ }
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
@@ -2522,6 +2583,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// FIXME: it's tricky to do anything useful for this, but it is an important
// case for targets like X86.
break;
+ case ISD::EXTRACT_ELEMENT: {
+ const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ const int BitWidth = Op.getValueType().getSizeInBits();
+ const int Items =
+ Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+
+ // Get reverse index (starting from 1), Op1 value indexes elements from
+ // little end. Sign starts at big end.
+ const int rIndex = Items - 1 -
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ // If the sign portion ends in our element the substraction gives correct
+ // result. Otherwise it gives either negative or > bitwidth result
+ return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+ }
}
// If we are looking at the loaded value of the SDNode.
@@ -2683,6 +2759,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstantFP(apf, VT);
}
case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT);
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2756,7 +2834,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstant(api, VT);
}
case ISD::BITCAST:
- if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
@@ -3379,8 +3459,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Perform trivial constant folding.
- SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
- if (SV.getNode()) return SV;
+ if (SDValue SV =
+ FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()))
+ return SV;
// Canonicalize constant to RHS if commutative.
if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
@@ -3564,7 +3645,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
const APFloat &V3 = N3CFP->getValueAPF();
APFloat::opStatus s =
V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
- if (s != APFloat::opInvalidOp)
+ if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
return getConstantFP(V1, VT);
}
break;
@@ -3913,9 +3994,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize =
- MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4028,8 +4107,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4123,8 +4201,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4214,11 +4291,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memcpy with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result =
- TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
- isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemcpy(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// If we really need inline code and the target declined to provide it,
// use a (potentially long) sequence of loads and stores.
@@ -4280,10 +4359,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result = TSI->EmitTargetCodeForMemmove(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemmove(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
@@ -4332,10 +4413,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memset with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src,
- Size, Align, isVol, DstPtrInfo);
- if (Result.getNode())
- return Result;
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemset(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
// Emit a library call.
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
@@ -4680,10 +4763,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
assert(VT.isInteger() == MemVT.isInteger() &&
"Cannot convert from FP to Int or Int -> FP!");
assert(VT.isVector() == MemVT.isVector() &&
- "Cannot use trunc store to convert to or from a vector!");
+ "Cannot use an ext load to convert to or from a vector!");
assert((!VT.isVector() ||
VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
- "Cannot use trunc store to change the number of vector elements!");
+ "Cannot use an ext load to change the number of vector elements!");
}
bool Indexed = AM != ISD::UNINDEXED;
@@ -4917,6 +5000,61 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
return SDValue(N, 0);
}
+SDValue
+SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
+ MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
+ MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Ops, 4, VTs,
+ ExtTy, MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO, bool isTrunc) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Ops, 4,
+ VTs, isTrunc, MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
SDValue Chain, SDValue Ptr,
SDValue SV,
@@ -6495,11 +6633,25 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- // Handle X+C
- if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
- cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
- return true;
-
+ // Handle X + C.
+ if (isBaseWithConstantOffset(Loc)) {
+ int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc) {
+ // If the base location is a simple address with no offset itself, then
+ // the second load's first add operand should be the base address.
+ if (LocOffset == Dist * (int)Bytes)
+ return true;
+ } else if (isBaseWithConstantOffset(BaseLoc)) {
+ // The base location itself has an offset, so subtract that value from the
+ // second load's offset before comparing to distance * size.
+ int64_t BOffset =
+ cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
+ if ((LocOffset - BOffset) == Dist * (int)Bytes)
+ return true;
+ }
+ }
+ }
const GlobalValue *GV1 = nullptr;
const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;