diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-05-24 00:10:13 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-05-24 00:10:13 +0000 |
commit | d5cbf3aa26d146b34c84663fcbd0d3161df70809 (patch) | |
tree | cfb365ce16f6ce3b4e47b04ee2477632acab83a1 /lib/Target/X86/README-SSE.txt | |
parent | ab7c50b32b9a1faa3c21ea1ea684254bf8f9ec15 (diff) | |
download | external_llvm-d5cbf3aa26d146b34c84663fcbd0d3161df70809.zip external_llvm-d5cbf3aa26d146b34c84663fcbd0d3161df70809.tar.gz external_llvm-d5cbf3aa26d146b34c84663fcbd0d3161df70809.tar.bz2 |
This is done.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51526 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/README-SSE.txt')
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 46 |
1 files changed, 0 insertions, 46 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 58b4384..ad28248 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -382,52 +382,6 @@ elements are fixed zeros. //===---------------------------------------------------------------------===// -For this: - -#include <emmintrin.h> -void test(__m128d *r, __m128d *A, double B) { - *r = _mm_loadl_pd(*A, &B); -} - -We generates: - - subl $12, %esp - movsd 24(%esp), %xmm0 - movsd %xmm0, (%esp) - movl 20(%esp), %eax - movapd (%eax), %xmm0 - movlpd (%esp), %xmm0 - movl 16(%esp), %eax - movapd %xmm0, (%eax) - addl $12, %esp - ret - -icc generates: - - movl 4(%esp), %edx #3.6 - movl 8(%esp), %eax #3.6 - movapd (%eax), %xmm0 #4.22 - movlpd 12(%esp), %xmm0 #4.8 - movapd %xmm0, (%edx) #4.3 - ret #5.1 - -So icc is smart enough to know that B is in memory so it doesn't load it and -store it back to stack. - -This should be fixed by eliminating the llvm.x86.sse2.loadl.pd intrinsic, -lowering it to a load+insertelement instead. Already match the load+shuffle -as movlpd, so this should be easy. We already get optimal code for: - -define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) { -entry: - %tmp2 = load <2 x double>* %A, align 16 - %tmp8 = insertelement <2 x double> %tmp2, double %B, i32 0 - store <2 x double> %tmp8, <2 x double>* %r, align 16 - ret void -} - -//===---------------------------------------------------------------------===// - __m128d test1( __m128d A, __m128d B) { return _mm_shuffle_pd(A, B, 0x3); } |