Add support to GVN for performing sret return slot optimization. This means that, if an sret function tail calls

another sret function, it should pass its own sret parameter to the tail callee, allowing it to fill in the correct return value. llvm-gcc does not emit this by default. Instead, it allocates space in the caller for the sret of the tail call and then uses memcpy to copy the result into the caller's sret parameter. This optimization detects and optimizes that case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47265 91177308-0d34-0410-b5e6-96231b3b80d8
author: Owen Anderson <resistor@mac.com> 2008-02-18 09:24:53 +0000
committer: Owen Anderson <resistor@mac.com> 2008-02-18 09:24:53 +0000
commit: 5aa4f2a0857563b4ad9115c614afed9501aa58f4 (patch)
tree: d1def04f316ef138fc6afe4cb5a91349cfe71054 /test
parent: 874a892c9969fbbe50a5017868c2f82923632b29 (diff)
download: external_llvm-5aa4f2a0857563b4ad9115c614afed9501aa58f4.zip
external_llvm-5aa4f2a0857563b4ad9115c614afed9501aa58f4.tar.gz
external_llvm-5aa4f2a0857563b4ad9115c614afed9501aa58f4.tar.bz2
1 files changed, 28 insertions, 0 deletions
diff --git a/test/Transforms/GVN/sret.ll b/test/Transforms/GVN/sret.ll
new file mode 100644
index 0000000..9ae73ef
--- /dev/null
+++ b/test/Transforms/GVN/sret.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep memcpy | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  %z) nounwind  {
+entry:
+	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
+	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
+	%tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp8 = load x86_fp80* %tmp7, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %real, align 16
+	store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval  %iz ) nounwind 
+	%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
+	%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
author	Owen Anderson <resistor@mac.com>	2008-02-18 09:24:53 +0000
committer	Owen Anderson <resistor@mac.com>	2008-02-18 09:24:53 +0000
commit	5aa4f2a0857563b4ad9115c614afed9501aa58f4 (patch)
tree	d1def04f316ef138fc6afe4cb5a91349cfe71054 /test
parent	874a892c9969fbbe50a5017868c2f82923632b29 (diff)
download	external_llvm-5aa4f2a0857563b4ad9115c614afed9501aa58f4.zip external_llvm-5aa4f2a0857563b4ad9115c614afed9501aa58f4.tar.gz external_llvm-5aa4f2a0857563b4ad9115c614afed9501aa58f4.tar.bz2