From 0eb3edea9cb6819334173a7d288da85943201fe5 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Sat, 28 Jul 2012 16:48:01 +0000 Subject: X86 Peephole: fold loads to the source register operand if possible. Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. rdar://10554090 and rdar://11873276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160919 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'lib/CodeGen/PeepholeOptimizer.cpp') diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 91c33c4..0a4632a 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -78,6 +78,7 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -441,6 +442,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet LocalMIs; SmallSet ImmDefRegs; DenseMap ImmDefMIs; + SmallSet FoldAsLoadDefRegs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -448,6 +450,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); + FoldAsLoadDefRegs.clear(); bool First = true; MachineBasicBlock::iterator PMII; @@ -489,6 +492,25 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefRegs, + DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + PMII = FoldMI; + MII = llvm::next(PMII); + continue; + } + First = false; PMII = MII; ++MII; -- cgit v1.1 From e8b4a4a9d173d67e35e4b1d32e20140381db6bde Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Sun, 29 Jul 2012 02:44:09 +0000 Subject: Revert r160920 and r160919 due to dragonegg and clang selfhost failure git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160927 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'lib/CodeGen/PeepholeOptimizer.cpp') diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 0a4632a..91c33c4 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -78,7 +78,6 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); -STATISTIC(NumLoadFold, "Number of loads folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -442,7 +441,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet LocalMIs; SmallSet ImmDefRegs; DenseMap ImmDefMIs; - SmallSet FoldAsLoadDefRegs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -450,7 +448,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); - FoldAsLoadDefRegs.clear(); bool First = true; MachineBasicBlock::iterator PMII; @@ -492,25 +489,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } - MachineInstr *DefMI = 0; - MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefRegs, - DefMI); - if (FoldMI) { - // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. - LocalMIs.erase(MI); - LocalMIs.erase(DefMI); - LocalMIs.insert(FoldMI); - MI->eraseFromParent(); - DefMI->eraseFromParent(); - ++NumLoadFold; - - // MI is replaced with FoldMI. - Changed = true; - PMII = FoldMI; - MII = llvm::next(PMII); - continue; - } - First = false; PMII = MII; ++MII; -- cgit v1.1 From d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Thu, 2 Aug 2012 00:56:42 +0000 Subject: X86 Peephole: fold loads to the source register operand if possible. Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. This patch is a rework of r160919 and was tested on clang self-host on my local machine. rdar://10554090 and rdar://11873276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161152 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'lib/CodeGen/PeepholeOptimizer.cpp') diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 91c33c4..d9474bf 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -78,6 +78,7 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -114,6 +115,7 @@ namespace { bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); + bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); }; } @@ -384,6 +386,29 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, return false; } +/// isLoadFoldable - Check whether MI is a candidate for folding into a later +/// instruction. We only fold loads to virtual registers and the virtual +/// register defined has a single use. +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, + unsigned &FoldAsLoadDefReg) { + if (MI->canFoldAsLoad()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() == 1) { + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; + } + } + } + return false; +} + bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs) { @@ -441,6 +466,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet LocalMIs; SmallSet ImmDefRegs; DenseMap ImmDefMIs; + unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -448,6 +474,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); + FoldAsLoadDefReg = 0; bool First = true; MachineBasicBlock::iterator PMII; @@ -456,12 +483,17 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = &*MII; LocalMIs.insert(MI); + // If there exists an instruction which belongs to the following + // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { + FoldAsLoadDefReg = 0; ++MII; continue; } + if (MI->mayStore() || MI->isCall()) + FoldAsLoadDefReg = 0; if (MI->isBitcast()) { if (optimizeBitcastInstr(MI, MBB)) { @@ -489,6 +521,31 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { + // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr + // can enable folding by converting SUB to CMP. + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + PMII = FoldMI; + MII = llvm::next(PMII); + continue; + } + } First = false; PMII = MII; ++MII; -- cgit v1.1 From 127eea87d666ccc9fe7025f41148c33af0f8c84b Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Thu, 2 Aug 2012 19:37:32 +0000 Subject: X86 Peephole: fold loads to the source register operand if possible. Add more comments and use early returns to reduce nesting in isLoadFoldable. Also disable folding for V_SET0 to avoid introducing a const pool entry and a const pool load. rdar://10554090 and rdar://11873276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161207 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'lib/CodeGen/PeepholeOptimizer.cpp') diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index d9474bf..6bc7e37 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -391,20 +391,21 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, /// register defined has a single use. bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg) { - if (MI->canFoldAsLoad()) { - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.getNumDefs() == 1) { - unsigned Reg = MI->getOperand(0).getReg(); - // To reduce compilation time, we check MRI->hasOneUse when inserting - // loads. It should be checked when processing uses of the load, since - // uses can be removed during peephole. - if (!MI->getOperand(0).getSubReg() && - TargetRegisterInfo::isVirtualRegister(Reg) && - MRI->hasOneUse(Reg)) { - FoldAsLoadDefReg = Reg; - return true; - } - } + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; } return false; } -- cgit v1.1 From f2c64ef519b38a4328809b27b4a3a8e0c26e9709 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 16 Aug 2012 23:11:47 +0000 Subject: Add an MCID::Select flag and TII hooks for optimizing selects. Select instructions pick one of two virtual registers based on a condition, like x86 cmov. On targets like ARM that support predication, selects can sometimes be eliminated by predicating the instruction defining one of the operands. Teach PeepholeOptimizer to recognize select instructions, and ask the target to optimize them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162059 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 43 ++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'lib/CodeGen/PeepholeOptimizer.cpp') diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 6bc7e37..096df7b 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -79,6 +79,7 @@ STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumLoadFold, "Number of loads folded"); +STATISTIC(NumSelects, "Number of selects optimized"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -109,6 +110,7 @@ namespace { bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &LocalMIs); + bool optimizeSelect(MachineInstr *MI); bool isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); @@ -386,6 +388,23 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, return false; } +/// Optimize a select instruction. +bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { + unsigned TrueOp = 0; + unsigned FalseOp = 0; + bool Optimizable = false; + SmallVector Cond; + if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) + return false; + if (!Optimizable) + return false; + if (!TII->optimizeSelect(MI)) + return false; + MI->eraseFromParent(); + ++NumSelects; + return true; +} + /// isLoadFoldable - Check whether MI is a candidate for folding into a later /// instruction. We only fold loads to virtual registers and the virtual /// register defined has a single use. @@ -496,22 +515,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->mayStore() || MI->isCall()) FoldAsLoadDefReg = 0; - if (MI->isBitcast()) { - if (optimizeBitcastInstr(MI, MBB)) { - // MI is deleted. - LocalMIs.erase(MI); - Changed = true; - MII = First ? I->begin() : llvm::next(PMII); - continue; - } - } else if (MI->isCompare()) { - if (optimizeCmpInstr(MI, MBB)) { - // MI is deleted. - LocalMIs.erase(MI); - Changed = true; - MII = First ? I->begin() : llvm::next(PMII); - continue; - } + if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || + (MI->isSelect() && optimizeSelect(MI))) { + // MI is deleted. + LocalMIs.erase(MI); + Changed = true; + MII = First ? I->begin() : llvm::next(PMII); + continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { -- cgit v1.1 From cabc0699ea32cad78028a6533aef1e380064262e Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 17 Aug 2012 14:38:59 +0000 Subject: Use standard pattern for iterate+erase. Increment the MBB iterator at the top of the loop to properly handle the current (and previous) instructions getting erased. This fixes PR13625. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162099 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'lib/CodeGen/PeepholeOptimizer.cpp') diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 096df7b..9099862 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -496,11 +496,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { ImmDefMIs.clear(); FoldAsLoadDefReg = 0; - bool First = true; - MachineBasicBlock::iterator PMII; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; + // We may be erasing MI below, increment MII now. + ++MII; LocalMIs.insert(MI); // If there exists an instruction which belongs to the following @@ -509,7 +509,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { FoldAsLoadDefReg = 0; - ++MII; continue; } if (MI->mayStore() || MI->isCall()) @@ -521,7 +520,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // MI is deleted. LocalMIs.erase(MI); Changed = true; - MII = First ? I->begin() : llvm::next(PMII); continue; } @@ -553,14 +551,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // MI is replaced with FoldMI. Changed = true; - PMII = FoldMI; - MII = llvm::next(PMII); continue; } } - First = false; - PMII = MII; - ++MII; } } -- cgit v1.1