diff options
author | Chris Lattner <sabre@nondot.org> | 2011-04-17 06:35:44 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-04-17 06:35:44 +0000 |
commit | fd3f6351035f6bf1a6bfc851da00c0fb24d6db09 (patch) | |
tree | 64265d6fb63de8be21e7ab616fa988f0b4face05 | |
parent | 8bdc251dc5392fdf4854dcf588d73927d6ef64b3 (diff) | |
download | external_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.zip external_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.tar.gz external_llvm-fd3f6351035f6bf1a6bfc851da00c0fb24d6db09.tar.bz2 |
Fix rdar://9289512 - not folding load into compare at -O0
The basic issue here is that bottom-up isel is matching the branch
and compare, and was failing to fold the load into the branch/compare
combo. Fixing this (by allowing folding into any instruction of a
sequence that is selected) allows us to produce things like:
cmpb $0, 52(%rax)
je LBB4_2
instead of:
movb 52(%rax), %cl
cmpb $0, %cl
je LBB4_2
This makes the generated -O0 code run a bit faster, but also speeds up
compile time by putting less pressure on the register allocator and
generating less code.
This was one of the biggest classes of missing load folding. Implementing
this shrinks 176.gcc's c-decl.s (as a random example) by about 4% in (verbose-asm)
line count.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129656 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/CodeGen/SelectionDAGISel.h | 3 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 56 | ||||
-rw-r--r-- | test/CodeGen/X86/fast-isel-x86-64.ll | 23 |
3 files changed, 65 insertions, 17 deletions
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index e0cfabd..ecf3947 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -280,7 +280,8 @@ private: void PrepareEHLandingPad(); void SelectAllBasicBlocks(const Function &Fn); - bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS); + bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst, + FastISel *FastIS); void FinishBasicBlock(); void SelectBasicBlock(BasicBlock::const_iterator Begin, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2ff46aa..c888b5e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -748,9 +748,31 @@ void SelectionDAGISel::PrepareEHLandingPad() { - +/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified +/// load into the specified FoldInst. Note that we could have a sequence where +/// multiple LLVM IR instructions are folded into the same machineinstr. For +/// example we could have: +/// A: x = load i32 *P +/// B: y = icmp A, 42 +/// C: br y, ... +/// +/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and +/// any other folded instructions) because it is between A and C. +/// +/// If we succeed in folding the load into the operation, return true. +/// bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + const Instruction *FoldInst, FastISel *FastIS) { + SmallPtrSet<const Instruction*, 4> FoldedInsts; + for (BasicBlock::const_iterator II = FoldInst; &*II != LI; --II) + FoldedInsts.insert(II); + + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. + if (!FoldedInsts.count(LI->use_back())) + return false; + // Don't try to fold volatile loads. Target has to deal with alignment // constraints. if (LI->isVolatile()) return false; @@ -835,10 +857,10 @@ static void CheckLineNumbers(const MachineBasicBlock *MBB) { /// Return false if it needs to be emitted. static bool isFoldedOrDeadInstruction(const Instruction *I, FunctionLoweringInfo *FuncInfo) { - return !I->mayWriteToMemory() && - !isa<TerminatorInst>(I) && - !isa<DbgInfoIntrinsic>(I) && - !FuncInfo->isExportedInst(I); + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { @@ -930,16 +952,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { - // If fast isel succeeded, check to see if there is a single-use - // non-volatile load right before the selected instruction, and see if - // the load is used by the instruction. If so, try to fold it. - const Instruction *BeforeInst = 0; - if (Inst != Begin) - BeforeInst = llvm::prior(llvm::prior(BI)); - if (BeforeInst && isa<LoadInst>(BeforeInst) && - BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) - --BI; // If we succeeded, don't re-select the load. + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != Begin) { + BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) + // If we succeeded, don't re-select the load. + BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); continue; } diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll index b2d1263..6137b48 100644 --- a/test/CodeGen/X86/fast-isel-x86-64.ll +++ b/test/CodeGen/X86/fast-isel-x86-64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -fast-isel -O0 -regalloc=fast | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" @@ -12,3 +12,24 @@ define i32 @test1(i32 %i) nounwind ssp { ; CHECK: test1: ; CHECK: andl $8, + + +; rdar://9289512 - The load should fold into the compare. +define void @test2(i64 %x) nounwind ssp { +entry: + %x.addr = alloca i64, align 8 + store i64 %x, i64* %x.addr, align 8 + %tmp = load i64* %x.addr, align 8 + %cmp = icmp sgt i64 %tmp, 42 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; CHECK: test2: +; CHECK: movq %rdi, -8(%rsp) +; CHECK: cmpq $42, -8(%rsp) |