From d5cbf3aa26d146b34c84663fcbd0d3161df70809 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sat, 24 May 2008 00:10:13 +0000 Subject: This is done. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51526 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README-SSE.txt | 46 ------------------------------------------- 1 file changed, 46 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 58b4384..ad28248 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -382,52 +382,6 @@ elements are fixed zeros. //===---------------------------------------------------------------------===// -For this: - -#include -void test(__m128d *r, __m128d *A, double B) { - *r = _mm_loadl_pd(*A, &B); -} - -We generates: - - subl $12, %esp - movsd 24(%esp), %xmm0 - movsd %xmm0, (%esp) - movl 20(%esp), %eax - movapd (%eax), %xmm0 - movlpd (%esp), %xmm0 - movl 16(%esp), %eax - movapd %xmm0, (%eax) - addl $12, %esp - ret - -icc generates: - - movl 4(%esp), %edx #3.6 - movl 8(%esp), %eax #3.6 - movapd (%eax), %xmm0 #4.22 - movlpd 12(%esp), %xmm0 #4.8 - movapd %xmm0, (%edx) #4.3 - ret #5.1 - -So icc is smart enough to know that B is in memory so it doesn't load it and -store it back to stack. - -This should be fixed by eliminating the llvm.x86.sse2.loadl.pd intrinsic, -lowering it to a load+insertelement instead. Already match the load+shuffle -as movlpd, so this should be easy. We already get optimal code for: - -define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) { -entry: - %tmp2 = load <2 x double>* %A, align 16 - %tmp8 = insertelement <2 x double> %tmp2, double %B, i32 0 - store <2 x double> %tmp8, <2 x double>* %r, align 16 - ret void -} - -//===---------------------------------------------------------------------===// - __m128d test1( __m128d A, __m128d B) { return _mm_shuffle_pd(A, B, 0x3); } -- cgit v1.1