From 80b09fe8bc1d2755ef9a6b03b8862a657db42f06 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 10 Apr 2008 02:32:10 +0000 Subject: Teach branch folding pass about implicit_def instructions. Unfortunately we can't just eliminate them since register scavenger expects every register use to be defined. However, we can delete them when there are no intra-block uses. Carefully removing some implicit def's which enable more blocks to be optimized away. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49461 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 57 +++++++++++++++++++++++++++- test/CodeGen/X86/2007-06-14-branchfold.ll | 4 +- test/CodeGen/X86/2008-04-09-BranchFolding.ll | 48 +++++++++++++++++++++++ 3 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/X86/2008-04-09-BranchFolding.ll diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index d9874b5..703addc 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include @@ -78,6 +79,7 @@ namespace { bool OptimizeBranches(MachineFunction &MF); void OptimizeBlock(MachineBasicBlock *MBB); void RemoveDeadBlock(MachineBasicBlock *MBB); + bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); bool CanFallThrough(MachineBasicBlock *CurBB); bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, @@ -117,10 +119,63 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { MF->getBasicBlockList().erase(MBB); } +/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def +/// followed by terminators, and if the implicitly defined registers are not +/// used by the terminators, remove those implicit_def's. e.g. +/// BB1: +/// r0 = implicit_def +/// r1 = implicit_def +/// br +/// This block can be optimized away later if the implicit instructions are +/// removed. +bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { + SmallSet ImpDefRegs; + MachineBasicBlock::iterator I = MBB->begin(); + while (I != MBB->end()) { + if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + break; + unsigned Reg = I->getOperand(0).getReg(); + ImpDefRegs.insert(Reg); + for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + ImpDefRegs.insert(SubReg); + ++I; + } + if (ImpDefRegs.empty()) + return false; + + MachineBasicBlock::iterator FirstTerm = I; + while (I != MBB->end()) { + if (!TII->isUnpredicatedTerminator(I)) + return false; + // See if it uses any of the implicitly defined registers. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (ImpDefRegs.count(Reg)) + return false; + } + ++I; + } + + I = MBB->begin(); + while (I != FirstTerm) { + MachineInstr *ImpDefMI = &*I; + ++I; + MBB->erase(ImpDefMI); + } + + return true; +} + bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); if (!TII) return false; + RegInfo = MF.getTarget().getRegisterInfo(); + // Fix CFG. The later algorithms expect it to be right. bool EverMadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { @@ -128,9 +183,9 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { std::vector Cond; if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + EverMadeChange |= OptimizeImpDefsBlock(MBB); } - RegInfo = MF.getTarget().getRegisterInfo(); RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; MMI = getAnalysisToUpdate(); diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll index 697d6a3..7756d06 100644 --- a/test/CodeGen/X86/2007-06-14-branchfold.ll +++ b/test/CodeGen/X86/2007-06-14-branchfold.ll @@ -1,8 +1,6 @@ -; RUN: llvm-as < %s | llc -mcpu=i686 | not grep jmp +; RUN: llvm-as < %s | llc -march=x86 -mcpu=i686 | not grep jmp ; check that branch folding understands FP_REG_KILL is not a branch -; ModuleID = 'g.bc' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-pc-linux-gnu" %struct.FRAME.c34003a = type { float, float } @report_E = global i8 0 ; [#uses=0] diff --git a/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/test/CodeGen/X86/2008-04-09-BranchFolding.ll new file mode 100644 index 0000000..fea54c4 --- /dev/null +++ b/test/CodeGen/X86/2008-04-09-BranchFolding.ll @@ -0,0 +1,48 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep jmp + + %struct..0anon = type { i32 } + %struct.binding_level = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.binding_level*, i8, i8, i8, i8, i8, i32, %struct.tree_node* } + %struct.lang_decl = type opaque + %struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] } + %struct.tree_decl = type { [12 x i8], i8*, i32, %struct.tree_node*, i32, i8, i8, i8, i8, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct..0anon, { %struct.rtx_def* }, %struct.tree_node*, %struct.lang_decl* } + %struct.tree_node = type { %struct.tree_decl } + +define fastcc %struct.tree_node* @pushdecl(%struct.tree_node* %x) nounwind { +entry: + %tmp3.i40 = icmp eq %struct.binding_level* null, null ; [#uses=2] + br i1 false, label %bb143, label %bb140 +bb140: ; preds = %entry + br i1 %tmp3.i40, label %bb160, label %bb17.i +bb17.i: ; preds = %bb140 + ret %struct.tree_node* null +bb143: ; preds = %entry + %tmp8.i43 = load %struct.tree_node** null, align 4 ; <%struct.tree_node*> [#uses=1] + br i1 %tmp3.i40, label %bb160, label %bb9.i48 +bb9.i48: ; preds = %bb143 + ret %struct.tree_node* null +bb160: ; preds = %bb143, %bb140 + %t.0.reg2mem.0 = phi %struct.tree_node* [ null, %bb140 ], [ %tmp8.i43, %bb143 ] ; <%struct.tree_node*> [#uses=1] + %tmp162 = icmp eq %struct.tree_node* %t.0.reg2mem.0, null ; [#uses=2] + br i1 %tmp162, label %bb174, label %bb165 +bb165: ; preds = %bb160 + br label %bb174 +bb174: ; preds = %bb165, %bb160 + %line.0 = phi i32 [ 0, %bb165 ], [ undef, %bb160 ] ; [#uses=1] + %file.0 = phi i8* [ null, %bb165 ], [ undef, %bb160 ] ; [#uses=1] + br i1 %tmp162, label %bb344, label %bb73.i +bb73.i: ; preds = %bb174 + br i1 false, label %bb226.i, label %bb220.i +bb220.i: ; preds = %bb73.i + ret %struct.tree_node* null +bb226.i: ; preds = %bb73.i + br i1 false, label %bb260, label %bb273.i +bb273.i: ; preds = %bb226.i + ret %struct.tree_node* null +bb260: ; preds = %bb226.i + tail call void (i8*, i32, ...)* @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind + ret %struct.tree_node* null +bb344: ; preds = %bb174 + ret %struct.tree_node* null +} + +declare void @pedwarn_with_file_and_line(i8*, i32, ...) nounwind -- cgit v1.1