aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp152
1 files changed, 113 insertions, 39 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 00b4976..dfd41b7 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3872,11 +3872,13 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
bool HasInt256, bool V2IsSplat = false) {
- unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is512BitVector())
+ return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
+ unsigned NumElts = VT.getVectorNumElements();
if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3911,6 +3913,8 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
bool HasInt256, bool V2IsSplat = false) {
unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is512BitVector())
+ return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
@@ -3948,6 +3952,8 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
bool Is256BitVec = VT.is256BitVector();
+ if (VT.is512BitVector())
+ return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
@@ -3988,6 +3994,9 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is512BitVector())
+ return false;
+
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
@@ -4093,6 +4102,44 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
return (FstHalf | (SndHalf << 4));
}
+// Symetric in-lane mask. Each lane has 4 elements (for imm8)
+static bool isPermImmMask(ArrayRef<int> Mask, EVT VT, unsigned& Imm8) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize < 32)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ Imm8 = 0;
+ if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ Imm8 |= Mask[i] << (i*2);
+ }
+ return true;
+ }
+
+ unsigned LaneSize = 4;
+ SmallVector<int, 4> MaskVal(LaneSize, -1);
+
+ for (unsigned l = 0; l != NumElts; l += LaneSize) {
+ for (unsigned i = 0; i != LaneSize; ++i) {
+ if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
+ return false;
+ if (Mask[i+l] < 0)
+ continue;
+ if (MaskVal[i] < 0) {
+ MaskVal[i] = Mask[i+l] - l;
+ Imm8 |= MaskVal[i] << (i*2);
+ continue;
+ }
+ if (Mask[i+l] != (signed)(MaskVal[i]+l))
+ return false;
+ }
+ }
+ return true;
+}
+
/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
@@ -4163,7 +4210,8 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
- (VT.is256BitVector() && NumElems != 8))
+ (VT.is256BitVector() && NumElems != 8) ||
+ (VT.is512BitVector() && NumElems != 16))
return false;
// "i+1" is the value the indexed mask element must have
@@ -4186,7 +4234,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
- (VT.is256BitVector() && NumElems != 8))
+ (VT.is256BitVector() && NumElems != 8) ||
+ (VT.is512BitVector() && NumElems != 16))
return false;
// "i" is the value the indexed mask element must have
@@ -4449,27 +4498,6 @@ unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
-/// getShuffleCLImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
-/// Handles 256-bit.
-static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
-
- unsigned NumElts = VT.getVectorNumElements();
-
- assert((VT.is256BitVector() && NumElts == 4) &&
- "Unsupported vector type for VPERMQ/VPERMPD");
-
- unsigned Mask = 0;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt < 0)
- continue;
- Mask |= Elt << (i*2);
- }
-
- return Mask;
-}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
@@ -5288,7 +5316,10 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, false, 0);
- SmallVector<int, 8> Mask(NumElems, EltNo);
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumElems; ++i)
+ Mask.push_back(EltNo);
+
return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
}
@@ -5720,7 +5751,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
- if (VT == MVT::v4i32 || VT == MVT::v8i32)
+ if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
return getZeroVector(VT, Subtarget, DAG, dl);
@@ -7413,21 +7444,30 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (BlendOp.getNode())
return BlendOp;
- if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
- SmallVector<SDValue, 8> permclMask;
- for (unsigned i = 0; i != 8; ++i) {
- permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
+ unsigned Imm8;
+ if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))
+ return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);
+
+ if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) ||
+ VT.is512BitVector()) {
+ EVT MaskEltVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getVectorElementType().getSizeInBits());
+ EVT MaskVectorVT =
+ EVT::getVectorVT(*DAG.getContext(),MaskEltVT, NumElems);
+ SmallVector<SDValue, 16> permclMask;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT));
}
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32,
- &permclMask[0], 8);
- // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
- return DAG.getNode(X86ISD::VPERMV, dl, VT,
- DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
- }
- if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64))
- return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
- getShuffleCLImmediate(SVOp), DAG);
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT,
+ &permclMask[0], NumElems);
+ if (V2IsUndef)
+ // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
+ return DAG.getNode(X86ISD::VPERMV, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
+ return DAG.getNode(X86ISD::VPERMV3, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1, V2);
+ }
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
@@ -10149,6 +10189,36 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
+SDValue X86TargetLowering::LowerSIGN_EXTEND_AVX512(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op->getValueType(0);
+ SDValue In = Op->getOperand(0);
+ EVT InVT = In.getValueType();
+ SDLoc dl(Op);
+
+ if (InVT.getVectorElementType().getSizeInBits() >=8 &&
+ VT.getVectorElementType().getSizeInBits() >= 32)
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+
+ if (InVT.getVectorElementType() == MVT::i1) {
+ unsigned int NumElts = InVT.getVectorNumElements();
+ assert ((NumElts == 8 || NumElts == 16) &&
+ "Unsupported SIGN_EXTEND operation");
+ if (VT.getVectorElementType().getSizeInBits() >= 32) {
+ Constant *C =
+ ConstantInt::get(*DAG.getContext(),
+ (NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U));
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ return DAG.getNode(X86ISD::VBROADCASTM, dl, VT, In, Ld);
+ }
+ }
+ return SDValue();
+}
+
SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
MVT VT = Op->getValueType(0).getSimpleVT();
@@ -10156,6 +10226,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
MVT InVT = In.getValueType().getSimpleVT();
SDLoc dl(Op);
+ if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
+ return LowerSIGN_EXTEND_AVX512(Op, DAG);
+
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
(VT != MVT::v8i32 || InVT != MVT::v8i16))
return SDValue();
@@ -13239,6 +13312,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
+ case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";