diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-09 22:18:37 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-09 22:18:37 +0000 |
commit | cde4a1abd5c9ee58898e633e3801da4f98468d66 (patch) | |
tree | f6af36a6faf94ea84f773d578493d098d645bd12 | |
parent | fc430a662ff3a3b9075558d32ebe00ffe1a23f35 (diff) | |
download | external_llvm-cde4a1abd5c9ee58898e633e3801da4f98468d66.zip external_llvm-cde4a1abd5c9ee58898e633e3801da4f98468d66.tar.gz external_llvm-cde4a1abd5c9ee58898e633e3801da4f98468d66.tar.bz2 |
Use fp unpack instructions to unpack int types. Until we have AVX2, this
is the best we can do for these patterns. This fix PR10554.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137161 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 38 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-unpack.ll | 31 |
3 files changed, 65 insertions, 8 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 71b23e2..e9510af 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5940,7 +5940,9 @@ static inline unsigned getUNPCKLOpcode(EVT VT) { case MVT::v2i64: return X86ISD::PUNPCKLQDQ; case MVT::v4f32: return X86ISD::UNPCKLPS; case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v8i32: // Use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKLPSY; + case MVT::v4i64: // Use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; @@ -5956,7 +5958,9 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { case MVT::v2i64: return X86ISD::PUNPCKHQDQ; case MVT::v4f32: return X86ISD::UNPCKHPS; case MVT::v2f64: return X86ISD::UNPCKHPD; + case MVT::v8i32: // Use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKHPSY; + case MVT::v4i64: // Use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKHPDY; case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2b8180a..56b05e1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5799,18 +5799,24 @@ def : Pat<(X86Movddup (bc_v2f64 // Shuffle with UNPCKLPS def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), (UNPCKLPSrr VR128:$src1, VR128:$src2)>; +// Shuffle with VUNPCKHPSY +def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; + // Shuffle with UNPCKHPS def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; @@ -5828,21 +5834,33 @@ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; + // Shuffle with UNPCKLPD def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (UNPCKLPDrr VR128:$src1, VR128:$src2)>; +// Shuffle with VUNPCKLPDY +def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; + +def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; + // Shuffle with UNPCKHPD def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; @@ -5859,6 +5877,10 @@ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; // Shuffle with MOVLHPS def : Pat<(X86Movlhps VR128:$src1, diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll index 4e906ee..d420101 100644 --- a/test/CodeGen/X86/avx-unpack.ll +++ b/test/CodeGen/X86/avx-unpack.ll @@ -56,3 +56,34 @@ entry: ret <4 x double> %shuffle.i } +;;;; +;;;; Unpack versions using the fp unit for int unpacking +;;;; + +; CHECK: vunpckhps +define <8 x i32> @unpackhips1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> + ret <8 x i32> %shuffle.i +} + +; CHECK: vunpckhpd +define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> + ret <4 x i64> %shuffle.i +} + +; CHECK: vunpcklps +define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> + ret <8 x i32> %shuffle.i +} + +; CHECK: vunpcklpd +define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> + ret <4 x i64> %shuffle.i +} |