Introduce a pass to insert vzeroupper instructions to avoid AVX to

SSE transition penalty. The pass is enabled through the "x86-use-vzeroupper" llc command line option. This is only the first step (very naive and conservative one) to sketch out the idea, but proper DFA is coming next to allow smarter decisions. Comments and ideas now and in further commits will be very appreciated. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138317 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> 2011-08-23 01:14:17 +0000
committer: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> 2011-08-23 01:14:17 +0000
commit: 3bde6fe0df05558b89e7edfe48ac05da59beb81a (patch)
tree: 011a10aa34d5fb2d2afa5786803bd3f240a9d2a7 /test/CodeGen/X86/avx-vzeroupper.ll
parent: 7e99b5c8a36e3e8d611e47122f9c596b58ccf3e8 (diff)
download: external_llvm-3bde6fe0df05558b89e7edfe48ac05da59beb81a.zip
external_llvm-3bde6fe0df05558b89e7edfe48ac05da59beb81a.tar.gz
external_llvm-3bde6fe0df05558b89e7edfe48ac05da59beb81a.tar.bz2
1 files changed, 26 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx-vzeroupper.ll b/test/CodeGen/X86/avx-vzeroupper.ll
new file mode 100644
index 0000000..eaf236c
--- /dev/null
+++ b/test/CodeGen/X86/avx-vzeroupper.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %a
+  ret <4 x float> %add.i
+}
+
+; CHECK: _test00
+define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %b
+  ; CHECK: vzeroupper
+  ; CHECK-NEXT: callq _do_sse
+  %call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
+  %sub.i = fsub <4 x float> %call3, %add.i
+  ; CHECK-NOT: vzeroupper
+  ; CHECK: callq _do_sse_local
+  %call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
+  ; CHECK: vzeroupper
+  ; CHECK-NEXT: jmp _do_sse
+  %call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
+  ret <4 x float> %call10
+}
+
+declare <4 x float> @do_sse(<4 x float>)
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	2011-08-23 01:14:17 +0000
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	2011-08-23 01:14:17 +0000
commit	3bde6fe0df05558b89e7edfe48ac05da59beb81a (patch)
tree	011a10aa34d5fb2d2afa5786803bd3f240a9d2a7 /test/CodeGen/X86/avx-vzeroupper.ll
parent	7e99b5c8a36e3e8d611e47122f9c596b58ccf3e8 (diff)
download	external_llvm-3bde6fe0df05558b89e7edfe48ac05da59beb81a.zip external_llvm-3bde6fe0df05558b89e7edfe48ac05da59beb81a.tar.gz external_llvm-3bde6fe0df05558b89e7edfe48ac05da59beb81a.tar.bz2