implement rdar://6653118 - fastisel should fold loads where possible.

Since mem2reg isn't run at -O0, we get a ton of reloads from the stack, for example, before, this code: int foo(int x, int y, int z) { return x+y+z; } used to compile into: _foo: ## @foo subq $12, %rsp movl %edi, 8(%rsp) movl %esi, 4(%rsp) movl %edx, (%rsp) movl 8(%rsp), %edx movl 4(%rsp), %esi addl %edx, %esi movl (%rsp), %edx addl %esi, %edx movl %edx, %eax addq $12, %rsp ret Now we produce: _foo: ## @foo subq $12, %rsp movl %edi, 8(%rsp) movl %esi, 4(%rsp) movl %edx, (%rsp) movl 8(%rsp), %edx addl 4(%rsp), %edx ## Folded load addl (%rsp), %edx ## Folded load movl %edx, %eax addq $12, %rsp ret Fewer instructions and less register use = faster compiles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113102 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2010-09-05 02:18:34 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-09-05 02:18:34 +0000
commit: beac75da3784929aee9f0357fc5cd76d49d6c3d7 (patch)
tree: 54525601bcdb388c49b9d014c90e6cbe8dbc1467 /test
parent: 07c2b7ff685523458f92339facfd814689e55873 (diff)
download: external_llvm-beac75da3784929aee9f0357fc5cd76d49d6c3d7.zip
external_llvm-beac75da3784929aee9f0357fc5cd76d49d6c3d7.tar.gz
external_llvm-beac75da3784929aee9f0357fc5cd76d49d6c3d7.tar.bz2
1 files changed, 14 insertions, 4 deletions
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 35ec1e7..8db1936 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,10 +1,8 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
-; RUN:   grep lazy_ptr, | count 2
-; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
-; RUN:   grep lea
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
 
 @src = external global i32
 
+; rdar://6653118
 define i32 @loadgv() nounwind {
 entry:
 	%0 = load i32* @src, align 4
@@ -12,6 +10,14 @@ entry:
         %2 = add i32 %0, %1
         store i32 %2, i32* @src
 	ret i32 %2
+; This should fold one of the loads into the add.
+; CHECK: loadgv:
+; CHECK: 	movl	L_src$non_lazy_ptr, %ecx
+; CHECK: 	movl	(%ecx), %eax
+; CHECK: 	addl	(%ecx), %eax
+; CHECK: 	movl	%eax, (%ecx)
+; CHECK: 	ret
+
 }
 
 %stuff = type { i32 (...)** }
@@ -21,4 +27,8 @@ define void @t(%stuff* %this) nounwind {
 entry:
 	store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
 	ret void
+; CHECK: _t:
+; CHECK:	movl	$0, %eax
+; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
+
 }
author	Chris Lattner <sabre@nondot.org>	2010-09-05 02:18:34 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-09-05 02:18:34 +0000
commit	beac75da3784929aee9f0357fc5cd76d49d6c3d7 (patch)
tree	54525601bcdb388c49b9d014c90e6cbe8dbc1467 /test
parent	07c2b7ff685523458f92339facfd814689e55873 (diff)
download	external_llvm-beac75da3784929aee9f0357fc5cd76d49d6c3d7.zip external_llvm-beac75da3784929aee9f0357fc5cd76d49d6c3d7.tar.gz external_llvm-beac75da3784929aee9f0357fc5cd76d49d6c3d7.tar.bz2