aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2011-04-17 06:35:44 +0000
committerChris Lattner <sabre@nondot.org>2011-04-17 06:35:44 +0000
commitfd3f6351035f6bf1a6bfc851da00c0fb24d6db09 (patch)
tree64265d6fb63de8be21e7ab616fa988f0b4face05 /test/CodeGen
parent8bdc251dc5392fdf4854dcf588d73927d6ef64b3 (diff)
downloadexternal_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.zip
external_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.tar.gz
external_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.tar.bz2
Fix rdar://9289512 - not folding load into compare at -O0
The basic issue here is that bottom-up isel is matching the branch and compare, and was failing to fold the load into the branch/compare combo. Fixing this (by allowing folding into any instruction of a sequence that is selected) allows us to produce things like: cmpb $0, 52(%rax) je LBB4_2 instead of: movb 52(%rax), %cl cmpb $0, %cl je LBB4_2 This makes the generated -O0 code run a bit faster, but also speeds up compile time by putting less pressure on the register allocator and generating less code. This was one of the biggest classes of missing load folding. Implementing this shrinks 176.gcc's c-decl.s (as a random example) by about 4% in (verbose-asm) line count. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129656 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/X86/fast-isel-x86-64.ll23
1 files changed, 22 insertions, 1 deletions
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index b2d1263..6137b48 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -fast-isel -O0 -regalloc=fast | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
@@ -12,3 +12,24 @@ define i32 @test1(i32 %i) nounwind ssp {
; CHECK: test1:
; CHECK: andl $8,
+
+
+; rdar://9289512 - The load should fold into the compare.
+define void @test2(i64 %x) nounwind ssp {
+entry:
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %tmp = load i64* %x.addr, align 8
+ %cmp = icmp sgt i64 %tmp, 42
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; CHECK: test2:
+; CHECK: movq %rdi, -8(%rsp)
+; CHECK: cmpq $42, -8(%rsp)