diff options
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.cpp | 21 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 91 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 12 | ||||
-rw-r--r-- | test/CodeGen/X86/SIMD/dg.exp | 5 | ||||
-rw-r--r-- | test/CodeGen/X86/SIMD/notvunpcklpd.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/X86/SIMD/notvunpcklps.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/X86/SIMD/vunpcklpd.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/X86/SIMD/vunpcklps.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-256-unpack.ll | 58 |
11 files changed, 148 insertions, 132 deletions
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index c1ff0e5..2c8a9e4 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -167,23 +167,22 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElts / NumSections; + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; unsigned Start = 0; - unsigned End = NumSectionElts / 2; - for (unsigned s = 0; s < NumSections; ++s) { + unsigned End = NumLaneElts / 2; + for (unsigned s = 0; s < NumLanes; ++s) { for (unsigned i = Start; i != End; ++i) { ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2 + ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2 } // Process the next 128 bits. - Start += NumSectionElts; - End += NumSectionElts; + Start += NumLaneElts; + End += NumLaneElts; } } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1c87f14..26c48a4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2711,6 +2711,8 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -2782,6 +2784,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -3219,20 +3223,22 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for unpckh"); + + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) return false; - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElts / NumSections; + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; unsigned Start = 0; - unsigned End = NumSectionElts; - for (unsigned s = 0; s < NumSections; ++s) { - for (unsigned i = Start, j = s * NumSectionElts; + unsigned End = NumLaneElts; + for (unsigned s = 0; s < NumLanes; ++s) { + for (unsigned i = Start, j = s * NumLaneElts; i != End; i += 2, ++j) { int BitI = Mask[i]; @@ -3248,8 +3254,8 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, } } // Process the next 128 bits. - Start += NumSectionElts; - End += NumSectionElts; + Start += NumLaneElts; + End += NumLaneElts; } return true; @@ -3266,21 +3272,38 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for unpckh"); + + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) return false; - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; - if (!isUndefOrEqual(BitI, j + NumElts/2)) - return false; - if (V2IsSplat) { - if (isUndefOrEqual(BitI1, NumElts)) - return false; - } else { - if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + unsigned Start = 0; + unsigned End = NumLaneElts; + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2; + i != End; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) return false; + if (V2IsSplat) { + if (isUndefOrEqual(BitI1, NumElts)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j+NumElts)) + return false; + } } + // Process the next 128 bits. + Start += NumLaneElts; + End += NumLaneElts; } return true; } @@ -3299,16 +3322,14 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElems / NumSections; + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElems / NumLanes; - for (unsigned s = 0; s < NumSections; ++s) { - for (unsigned i = s * NumSectionElts, j = s * NumSectionElts; - i != NumSectionElts * (s + 1); + for (unsigned s = 0; s < NumLanes; ++s) { + for (unsigned i = s * NumLaneElts, j = s * NumLaneElts; + i != NumLaneElts * (s + 1); i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -4095,6 +4116,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: DecodeUNPCKHPMask(NumElems, ShuffleMask); break; case X86ISD::PUNPCKLBW: @@ -5751,6 +5774,8 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { case MVT::v2i64: return X86ISD::PUNPCKHQDQ; case MVT::v4f32: return X86ISD::UNPCKHPS; case MVT::v2f64: return X86ISD::UNPCKHPD; + case MVT::v8f32: return X86ISD::VUNPCKHPSY; + case MVT::v4f64: return X86ISD::VUNPCKHPDY; case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; default: @@ -12597,6 +12622,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0bb7933..5d6f36f 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -261,6 +261,8 @@ namespace llvm { VUNPCKLPDY, UNPCKHPS, UNPCKHPD, + VUNPCKHPSY, + VUNPCKHPDY, PUNPCKLBW, PUNPCKLWD, PUNPCKLDQ, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 3045405..094f637 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -133,12 +133,15 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; -def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; +def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>; def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>; -def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; -def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; + +def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; +def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; +def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>; +def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>; def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a864175..59f1dc1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5677,6 +5677,12 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), (UNPCKHPSrr VR128:$src1, VR128:$src2)>; +// Shuffle with VUNPCKHPSY +def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; + // Shuffle with UNPCKLPD def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; @@ -5703,6 +5709,12 @@ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), (UNPCKHPDrr VR128:$src1, VR128:$src2)>; +// Shuffle with VUNPCKHPDY +def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; + // Shuffle with MOVLHPS def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), diff --git a/test/CodeGen/X86/SIMD/dg.exp b/test/CodeGen/X86/SIMD/dg.exp deleted file mode 100644 index 629a147..0000000 --- a/test/CodeGen/X86/SIMD/dg.exp +++ /dev/null @@ -1,5 +0,0 @@ -load_lib llvm.exp - -if { [llvm_supports_target X86] } { - RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] -} diff --git a/test/CodeGen/X86/SIMD/notvunpcklpd.ll b/test/CodeGen/X86/SIMD/notvunpcklpd.ll deleted file mode 100644 index 3817ee7..0000000 --- a/test/CodeGen/X86/SIMD/notvunpcklpd.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -mattr=+avx | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) { -entry: - %incarray1 = alloca [2 x <4 x double>]*, align 8 - %incarrayb1 = alloca [2 x <4 x double>]*, align 8 - %carray = alloca [2 x <4 x double>], align 16 - %r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0 - %rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0 - %r3 = load <4 x double>* %r, align 8 - %r4 = load <4 x double>* %rb, align 8 - %r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x double>> [#uses=1] -; CHECK-NOT: vunpcklpd %ymm - %r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1 - store <4 x double> %r11, <4 x double>* %r12, align 4 - ret void -} diff --git a/test/CodeGen/X86/SIMD/notvunpcklps.ll b/test/CodeGen/X86/SIMD/notvunpcklps.ll deleted file mode 100644 index e3b115f..0000000 --- a/test/CodeGen/X86/SIMD/notvunpcklps.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -mattr=+avx | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) { -enmtry: - %incarray1 = alloca [2 x <8 x float>]*, align 8 - %incarrayb1 = alloca [2 x <8 x float>]*, align 8 - %carray = alloca [2 x <8 x float>], align 16 - %r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0 - %rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0 - %r3 = load <8 x float>* %r, align 8 - %r4 = load <8 x float>* %rb, align 8 - %r8 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x float>> [#uses=1] -; CHECK-NOT: vunpcklps %ymm - %r9 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 0 - store <8 x float> %r8, <8 x float>* %r9, align 4 - ret void -} diff --git a/test/CodeGen/X86/SIMD/vunpcklpd.ll b/test/CodeGen/X86/SIMD/vunpcklpd.ll deleted file mode 100644 index 60d23a4..0000000 --- a/test/CodeGen/X86/SIMD/vunpcklpd.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -mattr=+avx | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) { -entry: - %incarray1 = alloca [2 x <4 x double>]*, align 8 - %incarrayb1 = alloca [2 x <4 x double>]*, align 8 - %carray = alloca [2 x <4 x double>], align 16 - %r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0 - %rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0 - %r3 = load <4 x double>* %r, align 8 - %r4 = load <4 x double>* %rb, align 8 - %r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 2, i32 6 > ; <<4 x double>> [#uses=1] -; CHECK: vunpcklpd - %r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1 - store <4 x double> %r11, <4 x double>* %r12, align 4 - ret void -} diff --git a/test/CodeGen/X86/SIMD/vunpcklps.ll b/test/CodeGen/X86/SIMD/vunpcklps.ll deleted file mode 100644 index a87b299..0000000 --- a/test/CodeGen/X86/SIMD/vunpcklps.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -mattr=+avx | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) { -entry: - %incarray1 = alloca [2 x <8 x float>]*, align 8 - %incarrayb1 = alloca [2 x <8 x float>]*, align 8 - %carray = alloca [2 x <8 x float>], align 16 - %r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0 - %rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0 - %r3 = load <8 x float>* %r, align 8 - %r4 = load <8 x float>* %rb, align 8 - %r11 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13 > ; <<8 x float>> [#uses=1] -; CHECK: vunpcklps - %r12 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 1 - store <8 x float> %r11, <8 x float>* %r12, align 4 - ret void -} diff --git a/test/CodeGen/X86/avx-256-unpack.ll b/test/CodeGen/X86/avx-256-unpack.ll new file mode 100644 index 0000000..4e906ee --- /dev/null +++ b/test/CodeGen/X86/avx-256-unpack.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vunpckhps +define <8 x float> @unpackhips(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> + ret <8 x float> %shuffle.i +} + +; CHECK: vunpckhpd +define <4 x double> @unpackhipd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> + ret <4 x double> %shuffle.i +} + +; CHECK: vunpcklps +define <8 x float> @unpacklops(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> + ret <8 x float> %shuffle.i +} + +; CHECK: vunpcklpd +define <4 x double> @unpacklopd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> + ret <4 x double> %shuffle.i +} + +; CHECK-NOT: vunpcklps %ymm +define <8 x float> @unpacklops-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> + ret <8 x float> %shuffle.i +} + +; CHECK-NOT: vunpcklpd %ymm +define <4 x double> @unpacklopd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> + ret <4 x double> %shuffle.i +} + +; CHECK-NOT: vunpckhps %ymm +define <8 x float> @unpackhips-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13> + ret <8 x float> %shuffle.i +} + +; CHECK-NOT: vunpckhpd %ymm +define <4 x double> @unpackhipd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> + ret <4 x double> %shuffle.i +} + |