aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp144
1 files changed, 115 insertions, 29 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index f9b3f1a..81008e9 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8856,6 +8856,37 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
+static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT VT = Op->getValueType(0).getSimpleVT();
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getValueType().getSimpleVT();
+ SDLoc DL(Op);
+ unsigned int NumElts = VT.getVectorNumElements();
+ if (NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
+
+ EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Now we have only mask extension
+ assert(InVT.getVectorElementType() == MVT::i1);
+ SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
+ const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+
+ SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
+ if (VT.is512BitVector())
+ return Brcst;
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
+}
+
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (Subtarget->hasFp256()) {
@@ -8874,6 +8905,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
+ if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
+ return LowerZERO_EXTEND_AVX512(Op, DAG);
+
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
@@ -8902,11 +8936,37 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
- MVT SVT = In.getSimpleValueType();
-
- if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
+ MVT InVT = In.getSimpleValueType();
+ assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
+ "Invalid TRUNCATE operation");
+
+ if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
+ if (VT.getVectorElementType().getSizeInBits() >=8)
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
+
+ assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
+ if (InVT.getSizeInBits() < 512) {
+ MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
+ In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
+ InVT = ExtVT;
+ }
+ SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
+ const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
+ SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
+ return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
+ }
+
+ if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
@@ -8937,7 +8997,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
}
- if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
+ if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
if (Subtarget->hasInt256()) {
In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
@@ -8995,11 +9055,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
// Handle truncation of V256 to V128 using shuffles.
- if (!VT.is128BitVector() || !SVT.is256BitVector())
+ if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
- assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
- "Invalid op");
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
@@ -10282,28 +10340,29 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
- if (InVT.getVectorElementType().getSizeInBits() >=8 &&
- VT.getVectorElementType().getSizeInBits() >= 32)
+ unsigned int NumElts = VT.getVectorNumElements();
+ if (NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
- if (InVT.getVectorElementType() == MVT::i1) {
- unsigned int NumElts = InVT.getVectorNumElements();
- assert ((NumElts == 8 || NumElts == 16) &&
- "Unsupported SIGN_EXTEND operation");
- if (VT.getVectorElementType().getSizeInBits() >= 32) {
- Constant *C =
- ConstantInt::get(*DAG.getContext(),
- (NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U));
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
- unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
- SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
- return DAG.getNode(X86ISD::VBROADCASTM, dl, VT, In, Ld);
- }
- }
- return SDValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+
+ MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
+ Constant *C = ConstantInt::get(*DAG.getContext(),
+ APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
+
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
+ if (VT.is512BitVector())
+ return Brcst;
+ return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
}
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
@@ -11142,10 +11201,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512:
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
- case Intrinsic::x86_avx_min_pd_256: {
+ case Intrinsic::x86_avx_min_pd_256:
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -11153,12 +11216,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512:
Opcode = X86ISD::FMAX;
break;
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512:
Opcode = X86ISD::FMIN;
break;
}
@@ -13375,6 +13442,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
+ case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
+ case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
@@ -16274,6 +16343,23 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
+ if (Subtarget->hasAVX512() && VT.isVector() &&
+ Cond.getValueType().getVectorElementType() == MVT::i1) {
+ // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
+ // lowering on AVX-512. In this case we convert it to
+ // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
+ // The same situation for all 128 and 256-bit vectors of i8 and i16
+ EVT OpVT = LHS.getValueType();
+ if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
+ (OpVT.getVectorElementType() == MVT::i8 ||
+ OpVT.getVectorElementType() == MVT::i16)) {
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
+ DCI.AddToWorklist(Cond.getNode());
+ return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
+ }
+ else
+ return SDValue();
+ }
// If this is a select between two integer constants, try to do some
// optimizations.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {