diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 94 | ||||
-rw-r--r-- | test/CodeGen/X86/avx2-unpack.ll | 14 |
5 files changed, 44 insertions, 84 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4f7bf15..a5bfe1a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2852,6 +2852,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: @@ -2863,6 +2864,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: case X86ISD::VPERMILPS: @@ -2939,6 +2941,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: @@ -2950,6 +2953,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: return DAG.getNode(Opc, dl, VT, V1, V2); @@ -3569,7 +3573,7 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, "Unsupported vector type for unpckh"); if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && - (!HasAVX2 || NumElts != 16)) + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3619,7 +3623,7 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, "Unsupported vector type for unpckh"); if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && - (!HasAVX2 || NumElts != 16)) + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -4639,6 +4643,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHWDY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: @@ -4654,6 +4659,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLWDY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: @@ -6595,6 +6601,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; case MVT::v16i16: return X86ISD::VPUNPCKLWDY; + case MVT::v32i8: return X86ISD::VPUNPCKLBWY; default: llvm_unreachable("Unknown type for unpckl"); } @@ -6618,6 +6625,7 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; case MVT::v16i16: return X86ISD::VPUNPCKHWDY; + case MVT::v32i8: return X86ISD::VPUNPCKHBWY; default: llvm_unreachable("Unknown type for unpckh"); } @@ -11270,6 +11278,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; + case X86ISD::VPUNPCKLBWY: return "X86ISD::VPUNPCKLBWY"; case X86ISD::VPUNPCKLWDY: return "X86ISD::VPUNPCKLWDY"; case X86ISD::VPUNPCKLDQY: return "X86ISD::VPUNPCKLDQY"; case X86ISD::VPUNPCKLQDQY: return "X86ISD::VPUNPCKLQDQY"; @@ -11277,6 +11286,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::VPUNPCKHBWY: return "X86ISD::VPUNPCKHBWY"; case X86ISD::VPUNPCKHWDY: return "X86ISD::VPUNPCKHWDY"; case X86ISD::VPUNPCKHDQY: return "X86ISD::VPUNPCKHDQY"; case X86ISD::VPUNPCKHQDQY: return "X86ISD::VPUNPCKHQDQY"; @@ -14867,6 +14877,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHWDY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: @@ -14878,6 +14889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLWDY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7bb4da6..36cb152 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -285,6 +285,7 @@ namespace llvm { PUNPCKLWD, PUNPCKLDQ, PUNPCKLQDQ, + VPUNPCKLBWY, VPUNPCKLWDY, VPUNPCKLDQY, VPUNPCKLQDQY, @@ -292,6 +293,7 @@ namespace llvm { PUNPCKHWD, PUNPCKHDQ, PUNPCKHQDQ, + VPUNPCKHBWY, VPUNPCKHWDY, VPUNPCKHDQY, VPUNPCKHQDQY, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c4d311f..7e8bc04 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -144,6 +144,7 @@ def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; +def X86Punpcklbwy : SDNode<"X86ISD::VPUNPCKLBWY", SDTShuff2Op>; def X86Punpcklwdy : SDNode<"X86ISD::VPUNPCKLWDY", SDTShuff2Op>; def X86Punpckldqy : SDNode<"X86ISD::VPUNPCKLDQY", SDTShuff2Op>; def X86Punpcklqdqy : SDNode<"X86ISD::VPUNPCKLQDQY", SDTShuff2Op>; @@ -152,6 +153,7 @@ def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86Punpckhbwy : SDNode<"X86ISD::VPUNPCKHBWY", SDTShuff2Op>; def X86Punpckhwdy : SDNode<"X86ISD::VPUNPCKHWDY", SDTShuff2Op>; def X86Punpckhdqy : SDNode<"X86ISD::VPUNPCKHDQY", SDTShuff2Op>; def X86Punpckhqdqy : SDNode<"X86ISD::VPUNPCKHQDQY", SDTShuff2Op>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index cfb8c85..94bd825 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4204,19 +4204,8 @@ let Predicates = [HasAVX] in { bc_v8i16, 0>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, + bc_v2i64, 0>, VEX_4V; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, bc_v16i8, 0>, VEX_4V; @@ -4224,99 +4213,40 @@ let Predicates = [HasAVX] in { bc_v8i16, 0>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, + bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbwy, bc_v32i8>, VEX_4V; defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwdy, bc_v16i16>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldqy, bc_v8i32>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdqy, + bc_v4i64>, VEX_4V; - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; - - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbwy, bc_v32i8>, VEX_4V; defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwdy, bc_v16i16>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdqy, bc_v8i32>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdqy, + bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, bc_v2i64>; defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>; defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>; defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, bc_v2i64>; } } // ExeDomain = SSEPackedInt diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll index 51c0f16..aa97308 100644 --- a/test/CodeGen/X86/avx2-unpack.ll +++ b/test/CodeGen/X86/avx2-unpack.ll @@ -41,3 +41,17 @@ entry: %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> ret <16 x i16> %shuffle.i } + +; CHECK: vpunpckhbw +define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> + ret <32 x i8> %shuffle.i +} + +; CHECK: vpunpcklbw +define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> + ret <32 x i8> %shuffle.i +} |