Significantly simplify and improve handling of FP function results on x86-32.

This case returns the value in ST(0) and then has to convert it to an SSE register. This causes significant codegen ugliness in some cases. For example in the trivial fp-stack-direct-ret.ll testcase we used to generate: _bar: subl $28, %esp call L_foo$stub fstpl 16(%esp) movsd 16(%esp), %xmm0 movsd %xmm0, 8(%esp) fldl 8(%esp) addl $28, %esp ret because we move the result of foo() into an XMM register, then have to move it back for the return of bar. Instead of hacking ever-more special cases into the call result lowering code we take a much simpler approach: on x86-32, fp return is modeled as always returning into an f80 register which is then truncated to f32 or f64 as needed. Similarly for a result, we model it as an extension to f80 + return. This exposes the truncate and extensions to the dag combiner, allowing target independent code to hack on them, eliminating them in this case. This gives us this code for the example above: _bar: subl $12, %esp call L_foo$stub addl $12, %esp ret The nasty aspect of this is that these conversions are not legal, but we want the second pass of dag combiner (post-legalize) to be able to hack on them. To handle this, we lie to legalize and say they are legal, then custom expand them on entry to the isel pass (PreprocessForFPConvert). This is gross, but less gross than the code it is replacing :) This also allows us to generate better code in several other cases. For example on fp-stack-ret-conv.ll, we now generate: _test: subl $12, %esp call L_foo$stub fstps 8(%esp) movl 16(%esp), %eax cvtss2sd 8(%esp), %xmm0 movsd %xmm0, (%eax) addl $12, %esp ret where before we produced (incidentally, the old bad code is identical to what gcc produces): _test: subl $12, %esp call L_foo$stub fstpl (%esp) cvtsd2ss (%esp), %xmm0 cvtss2sd %xmm0, %xmm0 movl 16(%esp), %eax movsd %xmm0, (%eax) addl $12, %esp ret Note that we generate slightly worse code on pr1505b.ll due to a scheduling deficiency that is unrelated to this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46307 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2008-01-24 08:07:48 +0000
committer: Chris Lattner <sabre@nondot.org> 2008-01-24 08:07:48 +0000
commit: d43d00cf3ae44b94162552b19e4cf2491bc0533e (patch)
tree: 016abed6565ea71da4e31835aff169dc02e294f0 /test/CodeGen
parent: 125991a1f62db419fc4a504986a998d85fbef699 (diff)
download: external_llvm-d43d00cf3ae44b94162552b19e4cf2491bc0533e.zip
external_llvm-d43d00cf3ae44b94162552b19e4cf2491bc0533e.tar.gz
external_llvm-d43d00cf3ae44b94162552b19e4cf2491bc0533e.tar.bz2
3 files changed, 29 insertions, 1 deletions
diff --git a/test/CodeGen/X86/fp-stack-direct-ret.ll b/test/CodeGen/X86/fp-stack-direct-ret.ll
new file mode 100644
index 0000000..78be2a3
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-direct-ret.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -march=x86 | not grep fstp
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep movsd
+
+declare double @foo()
+
+define double @bar() {
+entry:
+	%tmp5 = tail call double @foo()
+	ret double %tmp5
+}
+
diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
new file mode 100644
index 0000000..5254e1c
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -mcpu=yonah | grep cvtss2sd
+; RUN: llvm-as < %s | llc -mcpu=yonah | grep fstps
+; RUN: llvm-as < %s | llc -mcpu=yonah | not grep cvtsd2ss
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(double *%b) {
+entry:
+	%tmp13 = tail call double @foo()
+	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
+	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+	volatile store double %tmp3940, double* %b
+	ret void
+}
+
+declare double @foo()
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index d5e3507..73cb23e 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstpl | count 3
+; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstpl | count 4
 ; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstps | count 3
 ; PR1505
author	Chris Lattner <sabre@nondot.org>	2008-01-24 08:07:48 +0000
committer	Chris Lattner <sabre@nondot.org>	2008-01-24 08:07:48 +0000
commit	d43d00cf3ae44b94162552b19e4cf2491bc0533e (patch)
tree	016abed6565ea71da4e31835aff169dc02e294f0 /test/CodeGen
parent	125991a1f62db419fc4a504986a998d85fbef699 (diff)
download	external_llvm-d43d00cf3ae44b94162552b19e4cf2491bc0533e.zip external_llvm-d43d00cf3ae44b94162552b19e4cf2491bc0533e.tar.gz external_llvm-d43d00cf3ae44b94162552b19e4cf2491bc0533e.tar.bz2