diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-07-21 01:55:47 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-07-21 01:55:47 +0000 |
commit | 65b74e1d00eef81b596b4c207fba069aa1eb8214 (patch) | |
tree | e4f8ca2a664f9674f641caee534c42d5d2053a01 /lib/Target/X86/Utils | |
parent | 9283b668a1d0b41b83292e7b3577ec34a4ed2810 (diff) | |
download | external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.zip external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.tar.gz external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.tar.bz2 |
Add support for 256-bit versions of VPERMIL instruction. This is a new
instruction introduced in AVX, which can operate on 128 and 256-bit vectors.
It considers a 256-bit vector as two independent 128-bit lanes. It can permute
any 32 or 64 elements inside a lane, and restricts the second lane to
have the same permutation of the first one. With the improved splat support
introduced early today, adding codegen for this instruction enable more
efficient 256-bit code:
Instead of:
vextractf128 $0, %ymm0, %xmm0
punpcklbw %xmm0, %xmm0
punpckhbw %xmm0, %xmm0
vinsertf128 $0, %xmm0, %ymm0, %ymm1
vinsertf128 $1, %xmm0, %ymm1, %ymm0
vextractf128 $1, %ymm0, %xmm1
shufps $1, %xmm1, %xmm1
movss %xmm1, 28(%rsp)
movss %xmm1, 24(%rsp)
movss %xmm1, 20(%rsp)
movss %xmm1, 16(%rsp)
vextractf128 $0, %ymm0, %xmm0
shufps $1, %xmm0, %xmm0
movss %xmm0, 12(%rsp)
movss %xmm0, 8(%rsp)
movss %xmm0, 4(%rsp)
movss %xmm0, (%rsp)
vmovaps (%rsp), %ymm0
We get:
vextractf128 $0, %ymm0, %xmm0
punpcklbw %xmm0, %xmm0
punpckhbw %xmm0, %xmm0
vinsertf128 $0, %xmm0, %ymm0, %ymm1
vinsertf128 $1, %xmm0, %ymm1, %ymm0
vpermilps $85, %ymm0, %ymm0
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135662 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/Utils')
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.cpp | 27 | ||||
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.h | 14 |
2 files changed, 41 insertions, 0 deletions
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index cd06060..c1ff0e5 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -187,4 +187,31 @@ void DecodeUNPCKLPMask(EVT VT, } } +void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeVPERMILMask(MVT::getVectorVT(MVT::i32, NElts), Imm, ShuffleMask); +} + +void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeVPERMILMask(MVT::getVectorVT(MVT::i64, NElts), Imm, ShuffleMask); +} + +// DecodeVPERMILMask - Decodes VPERMIL permutes for any 128-bit +// with 32/64-bit elements. For 256-bit vectors, it's considered +// as two 128 lanes and the mask of the first lane should be +// identical of the second one. +void DecodeVPERMILMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = 0; i != NumElts/NumLanes; ++i) { + unsigned Idx = (Imm >> (i*2)) & 0x3 ; + ShuffleMask.push_back(Idx+(l*NumElts/NumLanes)); + } + } +} + } // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index b18f670..4a52140 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -82,6 +82,20 @@ void DecodeUNPCKLPDMask(unsigned NElts, void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +// DecodeVPERMILMask - Decodes VPERMIL permutes for any 128-bit +// with 32/64-bit elements. For 256-bit vectors, it's considered +// as two 128 lanes and the mask of the first lane should be +// identical of the second one. +void DecodeVPERMILMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + } // llvm namespace #endif |