diff options
author | Chris Lattner <sabre@nondot.org> | 2011-04-17 06:35:44 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-04-17 06:35:44 +0000 |
commit | fd3f6351035f6bf1a6bfc851da00c0fb24d6db09 (patch) | |
tree | 64265d6fb63de8be21e7ab616fa988f0b4face05 /test/CodeGen | |
parent | 8bdc251dc5392fdf4854dcf588d73927d6ef64b3 (diff) | |
download | external_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.zip external_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.tar.gz external_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.tar.bz2 |
Fix rdar://9289512 - not folding load into compare at -O0
The basic issue here is that bottom-up isel is matching the branch
and compare, and was failing to fold the load into the branch/compare
combo. Fixing this (by allowing folding into any instruction of a
sequence that is selected) allows us to produce things like:
cmpb $0, 52(%rax)
je LBB4_2
instead of:
movb 52(%rax), %cl
cmpb $0, %cl
je LBB4_2
This makes the generated -O0 code run a bit faster, but also speeds up
compile time by putting less pressure on the register allocator and
generating less code.
This was one of the biggest classes of missing load folding. Implementing
this shrinks 176.gcc's c-decl.s (as a random example) by about 4% in (verbose-asm)
line count.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129656 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/X86/fast-isel-x86-64.ll | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll index b2d1263..6137b48 100644 --- a/test/CodeGen/X86/fast-isel-x86-64.ll +++ b/test/CodeGen/X86/fast-isel-x86-64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -fast-isel -O0 -regalloc=fast | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" @@ -12,3 +12,24 @@ define i32 @test1(i32 %i) nounwind ssp { ; CHECK: test1: ; CHECK: andl $8, + + +; rdar://9289512 - The load should fold into the compare. +define void @test2(i64 %x) nounwind ssp { +entry: + %x.addr = alloca i64, align 8 + store i64 %x, i64* %x.addr, align 8 + %tmp = load i64* %x.addr, align 8 + %cmp = icmp sgt i64 %tmp, 42 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; CHECK: test2: +; CHECK: movq %rdi, -8(%rsp) +; CHECK: cmpq $42, -8(%rsp) |