diff options
author | Craig Topper <craig.topper@gmail.com> | 2011-12-06 08:21:25 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2011-12-06 08:21:25 +0000 |
commit | 34671b812a9855623fd9a02e5e2b2cb95a13ba2f (patch) | |
tree | ec56f5c735591559ba285146f8a95f32a94b0248 | |
parent | 18851edbc4666a8c8695b294e8bdfabbe157c086 (diff) | |
download | external_llvm-34671b812a9855623fd9a02e5e2b2cb95a13ba2f.zip external_llvm-34671b812a9855623fd9a02e5e2b2cb95a13ba2f.tar.gz external_llvm-34671b812a9855623fd9a02e5e2b2cb95a13ba2f.tar.bz2 |
Merge floating point and integer UNPCK X86ISD node types.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145926 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 104 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 154 |
4 files changed, 104 insertions, 167 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 169e41b..fa43ac3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2851,10 +2851,8 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::UNPCKLP: - case X86ISD::PUNPCKL: - case X86ISD::UNPCKHP: - case X86ISD::PUNPCKH: + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: case X86ISD::VPERMILP: case X86ISD::VPERM2X128: return true; @@ -2914,10 +2912,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVLPD: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::UNPCKLP: - case X86ISD::PUNPCKL: - case X86ISD::UNPCKHP: - case X86ISD::PUNPCKH: + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: return DAG.getNode(Opc, dl, VT, V1, V2); } return SDValue(); @@ -4460,12 +4456,10 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); break; - case X86ISD::PUNPCKH: - case X86ISD::UNPCKHP: + case X86ISD::UNPCKH: DecodeUNPCKHMask(VT, ShuffleMask); break; - case X86ISD::PUNPCKL: - case X86ISD::UNPCKLP: + case X86ISD::UNPCKL: DecodeUNPCKLMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: @@ -6364,50 +6358,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v32i8: - case MVT::v16i8: - case MVT::v16i16: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: return X86ISD::PUNPCKL; - case MVT::v8i32: - case MVT::v4i64: - if (HasAVX2) return X86ISD::PUNPCKL; - // else use fp unit for int unpack. - case MVT::v8f32: - case MVT::v4f32: - case MVT::v4f64: - case MVT::v2f64: return X86ISD::UNPCKLP; - default: - llvm_unreachable("Unknown type for unpckl"); - } - return 0; -} - -static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v32i8: - case MVT::v16i8: - case MVT::v16i16: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: return X86ISD::PUNPCKH; - case MVT::v4i64: - case MVT::v8i32: - if (HasAVX2) return X86ISD::PUNPCKH; - // else use fp unit for int unpack. - case MVT::v8f32: - case MVT::v4f32: - case MVT::v4f64: - case MVT::v2f64: return X86ISD::UNPCKHP; - default: - llvm_unreachable("Unknown type for unpckh"); - } - return 0; -} - static SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, @@ -6518,11 +6468,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() && V2IsUndef && RelaxedMayFoldVectorLoad(V1)) @@ -6534,8 +6482,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Use to match splats if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then @@ -6635,12 +6582,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (isUNPCKLMask(M, VT, HasAVX2)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG); if (isUNPCKHMask(M, VT, HasAVX2)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -6664,12 +6609,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG); } // Normalize the node to match x86 shuffle ops if needed @@ -6689,8 +6632,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { if (VT == MVT::v2f64 || VT == MVT::v2i64) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); } if (isPSHUFHWMask(M, VT)) @@ -6708,11 +6650,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { X86::getShuffleSHUFImmediate(SVOp), DAG); if (isUNPCKL_v_undef_Mask(M, VT)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); if (isUNPCKH_v_undef_Mask(M, VT)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); //===--------------------------------------------------------------------===// // Generate target specific nodes for 128 or 256-bit shuffles only @@ -11023,10 +10963,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD"; case X86ISD::MOVSD: return "X86ISD::MOVSD"; case X86ISD::MOVSS: return "X86ISD::MOVSS"; - case X86ISD::UNPCKLP: return "X86ISD::UNPCKLP"; - case X86ISD::UNPCKHP: return "X86ISD::UNPCKHP"; - case X86ISD::PUNPCKL: return "X86ISD::PUNPCKL"; - case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH"; + case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; + case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; @@ -14616,10 +14554,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::SHUFPS: // Handle all target specific shuffles case X86ISD::SHUFPD: case X86ISD::PALIGN: - case X86ISD::PUNPCKH: - case X86ISD::UNPCKHP: - case X86ISD::PUNPCKL: - case X86ISD::UNPCKLP: + case X86ISD::UNPCKH: + case X86ISD::UNPCKL: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dca172f..6b58f2a 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -273,10 +273,8 @@ namespace llvm { MOVLPD, MOVSD, MOVSS, - UNPCKLP, - UNPCKHP, - PUNPCKL, - PUNPCKH, + UNPCKL, + UNPCKH, VPERMILP, VPERM2X128, VBROADCAST, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 343c5b7..fc97eb8 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -130,11 +130,8 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def X86Unpcklp : SDNode<"X86ISD::UNPCKLP", SDTShuff2Op>; -def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>; - -def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>; -def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>; +def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; +def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 46993ef..2cae4b6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1157,11 +1157,11 @@ let Predicates = [HasAVX] in { (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), (VMOVHPSrm VR128:$src1, addr:$src2)>; - // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem + // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. - def : Pat<(v2f64 (X86Unpcklp VR128:$src1, + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (VMOVHPDrm VR128:$src1, addr:$src2)>; @@ -1172,10 +1172,10 @@ let Predicates = [HasAVX] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (VMOVHPSmr addr:$dst, VR128:$src)>; def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (VMOVHPDmr addr:$dst, VR128:$src)>; } @@ -1195,16 +1195,16 @@ let Predicates = [HasSSE1] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (MOVHPSmr addr:$dst, VR128:$src)>; } let Predicates = [HasSSE2] in { - // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem + // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. - def : Pat<(v2f64 (X86Unpcklp VR128:$src1, + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; @@ -1215,7 +1215,7 @@ let Predicates = [HasSSE2] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst), (MOVHPDmr addr:$dst, VR128:$src)>; } @@ -2431,27 +2431,27 @@ let AddedComplexity = 10 in { } // AddedComplexity let Predicates = [HasSSE1] in { - def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)), (UNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)), (UNPCKHPSrr VR128:$src1, VR128:$src2)>; } let Predicates = [HasSSE2] in { - def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)), (UNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)), (UNPCKHPDrr VR128:$src1, VR128:$src2)>; - // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the + // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. @@ -2464,59 +2464,43 @@ let Predicates = [HasSSE2] in { } let Predicates = [HasAVX] in { - def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)), (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)), (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8f32 (X86Unpcklp VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpcklp VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklp VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhp VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhp VR256:$src1, VR256:$src2)), - (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckhp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckhp VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f64 (X86Unpcklp VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpcklp VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpcklp VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpcklp VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86Unpckhp VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckhp VR256:$src1, VR256:$src2)), - (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckhp VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckhp VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the + // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. @@ -4199,66 +4183,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, } let Predicates = [HasAVX] in { - defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpckl, + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, bc_v16i8, 0>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpckl, + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, bc_v8i16, 0>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckl, + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, bc_v4i32, 0>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpckl, + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, bc_v2i64, 0>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckh, + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, bc_v16i8, 0>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckh, + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, bc_v8i16, 0>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckh, + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, bc_v4i32, 0>, VEX_4V; - defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckh, + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpckl, + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, bc_v32i8>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpckl, + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, bc_v16i16>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckl, + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, bc_v8i32>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpckl, + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, bc_v4i64>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckh, + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, bc_v32i8>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckh, + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, bc_v16i16>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckh, + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, bc_v8i32>, VEX_4V; - defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckh, + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpckl, + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, bc_v16i8>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpckl, + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, bc_v8i16>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckl, + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, bc_v4i32>; - defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpckl, + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, bc_v2i64>; - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckh, + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, bc_v16i8>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckh, + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, bc_v8i16>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckh, + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, bc_v4i32>; - defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckh, + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, bc_v2i64>; } } // ExeDomain = SSEPackedInt +// Patterns for using AVX1 instructions with integer vectors +// Here to give AVX2 priority +let Predicates = [HasAVX] in { + def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; +} + // Splat v2f64 / v2i64 let AddedComplexity = 10 in { def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), |