move the x86-32 part of PR2108 here.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49465 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2008-04-10 05:37:47 +0000
committer: Chris Lattner <sabre@nondot.org> 2008-04-10 05:37:47 +0000
commit: 302524611bfd6c0f02af5daa1df08ab203ea4002 (patch)
tree: a05bec357ce053d9355f3fc1b8f3bb57eff486a3 /lib/Target
parent: e6f143417485c270f8445032a4a775f96643d823 (diff)
download: external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.zip
external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.tar.gz
external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.tar.bz2
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 4d72245..bbd93bf 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -428,6 +428,54 @@ entry:
 
 //===---------------------------------------------------------------------===//
 
+Consider (PR2108):
+
+#include <xmmintrin.h>
+__m128i doload64(unsigned long long x) { return _mm_loadl_epi64(&x);}
+__m128i doload64_2(unsigned long long *x) { return _mm_loadl_epi64(x);}
+
+These are very similar routines, but we generate significantly worse code for
+the first one on x86-32:
+
+_doload64:
+	subl	$12, %esp
+	movl	20(%esp), %eax
+	movl	%eax, 4(%esp)
+	movl	16(%esp), %eax
+	movl	%eax, (%esp)
+	movsd	(%esp), %xmm0
+	addl	$12, %esp
+	ret
+_doload64_2:
+	movl	4(%esp), %eax
+	movsd	(%eax), %xmm0
+	ret
+
+The problem is that the argument lowering logic splits the i64 argument into
+2x i32 loads early, the f64 insert doesn't match.  Here's a reduced testcase:
+
+define fastcc double @doload64(i64 %x) nounwind  {
+entry:
+	%tmp717 = bitcast i64 %x to double		; <double> [#uses=1]
+	ret double %tmp717
+}
+
+compiles to:
+
+_doload64:
+	subl	$12, %esp
+	movl	20(%esp), %eax
+	movl	%eax, 4(%esp)
+	movl	16(%esp), %eax
+	movl	%eax, (%esp)
+	movsd	(%esp), %xmm0
+	addl	$12, %esp
+	ret
+
+instead of movsd from the stack.
+
+//===---------------------------------------------------------------------===//
+
 __m128d test1( __m128d A, __m128d B) {
   return _mm_shuffle_pd(A, B, 0x3);
 }
author	Chris Lattner <sabre@nondot.org>	2008-04-10 05:37:47 +0000
committer	Chris Lattner <sabre@nondot.org>	2008-04-10 05:37:47 +0000
commit	302524611bfd6c0f02af5daa1df08ab203ea4002 (patch)
tree	a05bec357ce053d9355f3fc1b8f3bb57eff486a3 /lib/Target
parent	e6f143417485c270f8445032a4a775f96643d823 (diff)
download	external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.zip external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.tar.gz external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.tar.bz2