diff options
author | David Greene <greened@obbligato.org> | 2011-03-02 17:23:43 +0000 |
---|---|---|
committer | David Greene <greened@obbligato.org> | 2011-03-02 17:23:43 +0000 |
commit | a20244d1bab2ae6dad1c82c16670d7eb1b3a9087 (patch) | |
tree | 08a97db15ba97dc695ecea19935051bc89e3937c /lib/Target/X86/Utils | |
parent | 31c488c8bdef56618e45a5356a7c6770d13e1241 (diff) | |
download | external_llvm-a20244d1bab2ae6dad1c82c16670d7eb1b3a9087.zip external_llvm-a20244d1bab2ae6dad1c82c16670d7eb1b3a9087.tar.gz external_llvm-a20244d1bab2ae6dad1c82c16670d7eb1b3a9087.tar.bz2 |
[AVX] Fix mask predicates for 256-bit UNPCKLPS/D and implement
missing patterns for them.
Add a SIMD test subdirectory to hold tests for SIMD instruction
selection correctness and quality.
'
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126845 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/Utils')
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.cpp | 25 |
1 files changed, 19 insertions, 6 deletions
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index eeb83c1..cd06060 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -165,12 +165,25 @@ void DecodeUNPCKLPDMask(unsigned NElts, /// datatypes and vector widths. void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { - - unsigned NElts = VT.getVectorNumElements(); - - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i); // Reads from dest - ShuffleMask.push_back(i+NElts); // Reads from src + unsigned NumElts = VT.getVectorNumElements(); + + // Handle vector lengths > 128 bits. Define a "section" as a set of + // 128 bits. AVX defines UNPCK* to operate independently on 128-bit + // sections. + unsigned NumSections = VT.getSizeInBits() / 128; + if (NumSections == 0 ) NumSections = 1; // Handle MMX + unsigned NumSectionElts = NumElts / NumSections; + + unsigned Start = 0; + unsigned End = NumSectionElts / 2; + for (unsigned s = 0; s < NumSections; ++s) { + for (unsigned i = Start; i != End; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2 + } + // Process the next 128 bits. + Start += NumSectionElts; + End += NumSectionElts; } } |