From 3bde6fe0df05558b89e7edfe48ac05da59beb81a Mon Sep 17 00:00:00 2001
From: Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Date: Tue, 23 Aug 2011 01:14:17 +0000
Subject: Introduce a pass to insert vzeroupper instructions to avoid AVX to
 SSE transition penalty. The pass is enabled through the "x86-use-vzeroupper"
 llc command line option. This is only the first step (very naive and
 conservative one) to sketch out the idea, but proper DFA is coming next to
 allow smarter decisions. Comments and ideas now and in further commits will
 be very appreciated.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138317 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/X86/avx-vzeroupper.ll | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 test/CodeGen/X86/avx-vzeroupper.ll

(limited to 'test/CodeGen/X86/avx-vzeroupper.ll')

diff --git a/test/CodeGen/X86/avx-vzeroupper.ll b/test/CodeGen/X86/avx-vzeroupper.ll
new file mode 100644
index 0000000..eaf236c
--- /dev/null
+++ b/test/CodeGen/X86/avx-vzeroupper.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %a
+  ret <4 x float> %add.i
+}
+
+; CHECK: _test00
+define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %b
+  ; CHECK: vzeroupper
+  ; CHECK-NEXT: callq _do_sse
+  %call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
+  %sub.i = fsub <4 x float> %call3, %add.i
+  ; CHECK-NOT: vzeroupper
+  ; CHECK: callq _do_sse_local
+  %call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
+  ; CHECK: vzeroupper
+  ; CHECK-NEXT: jmp _do_sse
+  %call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
+  ret <4 x float> %call10
+}
+
+declare <4 x float> @do_sse(<4 x float>)
-- 
cgit v1.1