diff options
author | Chris Lattner <sabre@nondot.org> | 2008-04-10 05:37:47 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2008-04-10 05:37:47 +0000 |
commit | 302524611bfd6c0f02af5daa1df08ab203ea4002 (patch) | |
tree | a05bec357ce053d9355f3fc1b8f3bb57eff486a3 /lib/Target | |
parent | e6f143417485c270f8445032a4a775f96643d823 (diff) | |
download | external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.zip external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.tar.gz external_llvm-302524611bfd6c0f02af5daa1df08ab203ea4002.tar.bz2 |
move the x86-32 part of PR2108 here.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49465 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 4d72245..bbd93bf 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -428,6 +428,54 @@ entry: //===---------------------------------------------------------------------===// +Consider (PR2108): + +#include <xmmintrin.h> +__m128i doload64(unsigned long long x) { return _mm_loadl_epi64(&x);} +__m128i doload64_2(unsigned long long *x) { return _mm_loadl_epi64(x);} + +These are very similar routines, but we generate significantly worse code for +the first one on x86-32: + +_doload64: + subl $12, %esp + movl 20(%esp), %eax + movl %eax, 4(%esp) + movl 16(%esp), %eax + movl %eax, (%esp) + movsd (%esp), %xmm0 + addl $12, %esp + ret +_doload64_2: + movl 4(%esp), %eax + movsd (%eax), %xmm0 + ret + +The problem is that the argument lowering logic splits the i64 argument into +2x i32 loads early, the f64 insert doesn't match. Here's a reduced testcase: + +define fastcc double @doload64(i64 %x) nounwind { +entry: + %tmp717 = bitcast i64 %x to double ; <double> [#uses=1] + ret double %tmp717 +} + +compiles to: + +_doload64: + subl $12, %esp + movl 20(%esp), %eax + movl %eax, 4(%esp) + movl 16(%esp), %eax + movl %eax, (%esp) + movsd (%esp), %xmm0 + addl $12, %esp + ret + +instead of movsd from the stack. + +//===---------------------------------------------------------------------===// + __m128d test1( __m128d A, __m128d B) { return _mm_shuffle_pd(A, B, 0x3); } |