diff options
author | Craig Topper <craig.topper@gmail.com> | 2011-11-21 06:57:39 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2011-11-21 06:57:39 +0000 |
commit | 6347e8662cea47065ed7092d75322076fc3e57f8 (patch) | |
tree | 5b16d10cded6c66e3cdb013ac1fc2bf1b575517d | |
parent | 62faf772781cf5c3bd6806a7887147e35713dd40 (diff) | |
download | external_llvm-6347e8662cea47065ed7092d75322076fc3e57f8.zip external_llvm-6347e8662cea47065ed7092d75322076fc3e57f8.tar.gz external_llvm-6347e8662cea47065ed7092d75322076fc3e57f8.tar.bz2 |
Add support for lowering 256-bit shuffles to VPUNPCKL/H for i16, i32, i64 if AVX2 is enabled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145026 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 120 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 12 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 20 |
4 files changed, 115 insertions, 47 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4ba4b93..4f7bf15 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2851,6 +2851,9 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLDQY: + case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: case X86ISD::VUNPCKHPSY: @@ -2859,6 +2862,9 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHDQY: + case X86ISD::VPUNPCKHQDQY: case X86ISD::VPERMILPS: case X86ISD::VPERMILPSY: case X86ISD::VPERMILPD: @@ -2932,6 +2938,9 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLDQY: + case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: case X86ISD::VUNPCKHPSY: @@ -2940,6 +2949,9 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHDQY: + case X86ISD::VPUNPCKHQDQY: return DAG.getNode(Opc, dl, VT, V1, V2); } return SDValue(); @@ -3550,13 +3562,14 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool V2IsSplat = false) { + bool HasAVX2, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || NumElts != 16)) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3590,22 +3603,23 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { SmallVector<int, 8> M; N->getMask(M); - return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat); + return ::isUNPCKLMask(M, N->getValueType(0), HasAVX2, V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool V2IsSplat = false) { + bool HasAVX2, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || NumElts != 16)) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3637,10 +3651,10 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { SmallVector<int, 8> M; N->getMask(M); - return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat); + return ::isUNPCKHMask(M, N->getValueType(0), HasAVX2, V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form @@ -4625,6 +4639,9 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHDQY: + case X86ISD::VPUNPCKHQDQY: DecodePUNPCKHMask(NumElems, ShuffleMask); break; case X86ISD::UNPCKHPS: @@ -4637,6 +4654,9 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLDQY: + case X86ISD::VPUNPCKLQDQY: DecodePUNPCKLMask(VT, ShuffleMask); break; case X86ISD::UNPCKLPS: @@ -6558,36 +6578,46 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT) { +static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; case MVT::v4f32: return X86ISD::UNPCKLPS; case MVT::v2f64: return X86ISD::UNPCKLPD; - case MVT::v8i32: // Use fp unit for int unpack. + case MVT::v8i32: + if (HasAVX2) return X86ISD::VPUNPCKLDQY; + // else use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKLPSY; - case MVT::v4i64: // Use fp unit for int unpack. + case MVT::v4i64: + if (HasAVX2) return X86ISD::VPUNPCKLQDQY; + // else use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; + case MVT::v16i16: return X86ISD::VPUNPCKLWDY; default: llvm_unreachable("Unknown type for unpckl"); } return 0; } -static inline unsigned getUNPCKHOpcode(EVT VT) { +static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKHDQ; case MVT::v2i64: return X86ISD::PUNPCKHQDQ; case MVT::v4f32: return X86ISD::UNPCKHPS; case MVT::v2f64: return X86ISD::UNPCKHPD; - case MVT::v8i32: // Use fp unit for int unpack. + case MVT::v8i32: + if (HasAVX2) return X86ISD::VPUNPCKHDQY; + // else use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKHPSY; - case MVT::v4i64: // Use fp unit for int unpack. + case MVT::v4i64: + if (HasAVX2) return X86ISD::VPUNPCKHQDQY; + // else use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKHPDY; case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; + case MVT::v16i16: return X86ISD::VPUNPCKHWDY; default: llvm_unreachable("Unknown type for unpckh"); } @@ -6688,6 +6718,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool V1IsSplat = false; bool V2IsSplat = false; bool HasXMMInt = Subtarget->hasXMMInt(); + bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); @@ -6717,9 +6748,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isMOVDDUPMask(SVOp) && (Subtarget->hasSSE3() || Subtarget->hasAVX()) && @@ -6730,9 +6763,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVHighToLow(Op, dl, DAG); // Use to match splats - if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef && + if (HasXMMInt && X86::isUNPCKHMask(SVOp, Subtarget->hasAVX2()) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then @@ -6779,7 +6813,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + if (X86::isMOVLHPSMask(SVOp) && + !X86::isUNPCKLMask(SVOp, Subtarget->hasAVX2())) return getMOVLowToHigh(Op, dl, DAG, HasXMMInt); if (X86::isMOVHLPSMask(SVOp)) @@ -6832,11 +6867,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKLMask(SVOp, Subtarget->hasAVX2())) + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2, + DAG); - if (X86::isUNPCKHMask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKHMask(SVOp, Subtarget->hasAVX2())) + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2, + DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -6845,9 +6882,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewMask = NormalizeMask(SVOp, DAG); ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask); if (NSVOp != SVOp) { - if (X86::isUNPCKLMask(NSVOp, true)) { + if (X86::isUNPCKLMask(NSVOp, Subtarget->hasAVX2(), true)) { return NewMask; - } else if (X86::isUNPCKHMask(NSVOp, true)) { + } else if (X86::isUNPCKHMask(NSVOp, Subtarget->hasAVX2(), true)) { return NewMask; } } @@ -6859,11 +6896,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); - if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKLMask(NewSVOp, Subtarget->hasAVX2())) + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1, + DAG); - if (X86::isUNPCKHMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKHMask(NewSVOp, Subtarget->hasAVX2())) + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1, + DAG); } // Normalize the node to match x86 shuffle ops if needed @@ -6904,9 +6943,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { X86::getShuffleSHUFImmediate(SVOp), DAG); if (X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); //===--------------------------------------------------------------------===// // Generate target specific nodes for 128 or 256-bit shuffles only @@ -11221,6 +11262,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; @@ -11228,10 +11270,16 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; + case X86ISD::VPUNPCKLWDY: return "X86ISD::VPUNPCKLWDY"; + case X86ISD::VPUNPCKLDQY: return "X86ISD::VPUNPCKLDQY"; + case X86ISD::VPUNPCKLQDQY: return "X86ISD::VPUNPCKLQDQY"; case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW"; case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::VPUNPCKHWDY: return "X86ISD::VPUNPCKHWDY"; + case X86ISD::VPUNPCKHDQY: return "X86ISD::VPUNPCKHDQY"; + case X86ISD::VPUNPCKHQDQY: return "X86ISD::VPUNPCKHQDQY"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS"; case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; @@ -11356,8 +11404,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) || - isUNPCKLMask(M, VT) || - isUNPCKHMask(M, VT) || + isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || + isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || isUNPCKL_v_undef_Mask(M, VT) || isUNPCKH_v_undef_Mask(M, VT)); } @@ -14819,6 +14867,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHDQY: + case X86ISD::VPUNPCKHQDQY: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: case X86ISD::VUNPCKHPSY: @@ -14827,6 +14878,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLDQY: + case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: case X86ISD::VUNPCKLPSY: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 5a40701..7bb4da6 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -285,10 +285,16 @@ namespace llvm { PUNPCKLWD, PUNPCKLDQ, PUNPCKLQDQ, + VPUNPCKLWDY, + VPUNPCKLDQY, + VPUNPCKLQDQY, PUNPCKHBW, PUNPCKHWD, PUNPCKHDQ, PUNPCKHQDQ, + VPUNPCKHWDY, + VPUNPCKHDQY, + VPUNPCKHQDQY, VPERMILPS, VPERMILPSY, VPERMILPD, @@ -414,11 +420,13 @@ namespace llvm { /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, + bool V2IsSplat = false); /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, + bool V2IsSplat = false); /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c91e2df..c4d311f 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -144,11 +144,17 @@ def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; +def X86Punpcklwdy : SDNode<"X86ISD::VPUNPCKLWDY", SDTShuff2Op>; +def X86Punpckldqy : SDNode<"X86ISD::VPUNPCKLDQY", SDTShuff2Op>; +def X86Punpcklqdqy : SDNode<"X86ISD::VPUNPCKLQDQY", SDTShuff2Op>; def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86Punpckhwdy : SDNode<"X86ISD::VPUNPCKHWDY", SDTShuff2Op>; +def X86Punpckhdqy : SDNode<"X86ISD::VPUNPCKHDQY", SDTShuff2Op>; +def X86Punpckhqdqy : SDNode<"X86ISD::VPUNPCKHQDQY", SDTShuff2Op>; def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>; def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; @@ -423,12 +429,12 @@ def movl : PatFrag<(ops node:$lhs, node:$rhs), def unpckl : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2()); }]>; def unpckh : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2()); }]>; def pshufd : PatFrag<(ops node:$lhs, node:$rhs), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 68010b7..cfb8c85 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4242,9 +4242,9 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, bc_v32i8>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd, + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwdy, bc_v16i16>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldqy, bc_v8i32>, VEX_4V; /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen @@ -4252,19 +4252,19 @@ let Predicates = [HasAVX2] in { def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, - VR256:$src2)))]>, VEX_4V; + [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, + VR256:$src2)))]>, VEX_4V; def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, + [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, (memopv4i64 addr:$src2))))]>, VEX_4V; defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, bc_v32i8>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd, + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwdy, bc_v16i16>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdqy, bc_v8i32>, VEX_4V; /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen @@ -4272,12 +4272,12 @@ let Predicates = [HasAVX2] in { def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, - VR256:$src2)))]>, VEX_4V; + [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, + VR256:$src2)))]>, VEX_4V; def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, + [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, (memopv4i64 addr:$src2))))]>, VEX_4V; } |