On recent Intel u-arch's, folding loads into some unary SSE instructions can

be non-optimal. To be precise, we should avoid folding loads if the instructions only update part of the destination register, and the non-updated part is not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these instructions breaks the partial register dependency and it can improve performance. e.g. movss (%rdi), %xmm0 cvtss2sd %xmm0, %xmm0 instead of cvtss2sd (%rdi), %xmm0 An alternative method to break dependency is to clear the register first. e.g. xorps %xmm0, %xmm0 cvtss2sd (%rdi), %xmm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91672 91177308-0d34-0410-b5e6-96231b3b80d8
author: Evan Cheng <evan.cheng@apple.com> 2009-12-18 07:40:29 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2009-12-18 07:40:29 +0000
commit: 400073d5467b79534d8c63b0d996a55e4252ff4b (patch)
tree: f7204e84da8877e7b062f05bcb1878a05108b44e /test/CodeGen
parent: 3a5d409f3c2eccf1d1f0a4616023760829a4db67 (diff)
download: external_llvm-400073d5467b79534d8c63b0d996a55e4252ff4b.zip
external_llvm-400073d5467b79534d8c63b0d996a55e4252ff4b.tar.gz
external_llvm-400073d5467b79534d8c63b0d996a55e4252ff4b.tar.bz2
1 files changed, 28 insertions, 0 deletions
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
new file mode 100644
index 0000000..00c943f
--- /dev/null
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+break-sse-dep | FileCheck %s --check-prefix=YES
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-break-sse-dep | FileCheck %s --check-prefix=NO
+
+define double @t1(float* nocapture %x) nounwind readonly ssp {
+entry:
+; YES: t1:
+; YES: movss (%rdi), %xmm0
+; YES; cvtss2sd %xmm0, %xmm0
+
+; NO: t1:
+; NO; cvtss2sd (%rdi), %xmm0
+  %0 = load float* %x, align 4
+  %1 = fpext float %0 to double
+  ret double %1
+}
+
+define float @t2(double* nocapture %x) nounwind readonly ssp {
+entry:
+; YES: t2:
+; YES: movsd (%rdi), %xmm0
+; YES; cvtsd2ss %xmm0, %xmm0
+
+; NO: t2:
+; NO; cvtsd2ss (%rdi), %xmm0
+  %0 = load double* %x, align 8
+  %1 = fptrunc double %0 to float
+  ret float %1
+}
author	Evan Cheng <evan.cheng@apple.com>	2009-12-18 07:40:29 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2009-12-18 07:40:29 +0000
commit	400073d5467b79534d8c63b0d996a55e4252ff4b (patch)
tree	f7204e84da8877e7b062f05bcb1878a05108b44e /test/CodeGen
parent	3a5d409f3c2eccf1d1f0a4616023760829a4db67 (diff)
download	external_llvm-400073d5467b79534d8c63b0d996a55e4252ff4b.zip external_llvm-400073d5467b79534d8c63b0d996a55e4252ff4b.tar.gz external_llvm-400073d5467b79534d8c63b0d996a55e4252ff4b.tar.bz2