From 46df4eb46e784036cf895db271fe29e1cf2a975a Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 16 Jun 2010 07:35:02 +0000 Subject: Make post-ra scheduling, anti-dep breaking, and register scavenger (conservatively) aware of predicated instructions. This enables ARM to move if-conversion before post-ra scheduler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106091 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 64 +++++++---- lib/CodeGen/AggressiveAntiDepBreaker.h | 1 + lib/CodeGen/CriticalAntiDepBreaker.cpp | 137 +++++++++++++++--------- lib/CodeGen/CriticalAntiDepBreaker.h | 5 +- lib/CodeGen/IfConversion.cpp | 128 +++++++++++++++++++--- lib/CodeGen/PostRAHazardRecognizer.cpp | 2 +- lib/CodeGen/RegisterScavenging.cpp | 8 +- lib/Target/ARM/ARMISelLowering.cpp | 7 +- lib/Target/ARM/ARMTargetMachine.cpp | 13 ++- test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll | 2 + test/CodeGen/Thumb2/thumb2-ifcvt2.ll | 2 +- 11 files changed, 274 insertions(+), 95 deletions(-) diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 727e8f9..7d923b1 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -114,6 +115,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)), State(NULL) { @@ -163,25 +165,27 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - State->UnionGroups(AliasReg, 0); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } + unsigned Reg = *I; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; } - } + } // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any @@ -390,7 +394,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq()) { + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -443,6 +448,26 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, std::multimap& RegRefs = State->GetRegRefs(); + // If MI's uses have special allocation requirement, don't allow + // any use registers to be changed. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + // Scan the register uses for this instruction and update // live-ranges, groups and RegRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -459,10 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // for the register. HandleLastUse(Reg, Count, "(last-use)"); - // If MI's uses have special allocation requirement, don't allow - // any use registers to be changed. Also assume all registers - // used in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraSrcRegAllocReq()) { + if (Special) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 506d43e..91ebb85 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -115,6 +115,7 @@ namespace llvm { class AggressiveAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; /// AllocatableSet - The set of allocatable registers. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 42cc448..e3746a9 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -29,6 +30,7 @@ CriticalAntiDepBreaker:: CriticalAntiDepBreaker(MachineFunction& MFi) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)) { @@ -71,25 +73,27 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; } - } + } // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any @@ -164,6 +168,26 @@ static const SDep *CriticalPathStep(const SUnit *SU) { } void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + // Scan the register operands for this instruction and update // Classes and RegRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -199,9 +223,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (Classes[Reg] != reinterpret_cast(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); - // It's not safe to change register allocation for source operands of - // that have special allocation requirements. - if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { + if (MO.isUse() && Special) { if (KeepRegs.insert(Reg)) { for (const unsigned *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) @@ -216,38 +238,43 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Update liveness. // Proceding upwards, registers that are defed but not used in this // instruction are now dead. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; - - DefIndices[Reg] = Count; - KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); - Classes[Reg] = 0; - RegRefs.erase(Reg); - // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; - DefIndices[SubregReg] = Count; - KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); - Classes[SubregReg] = 0; - RegRefs.erase(SubregReg); - } - // Conservatively mark super-registers as unusable. - for (const unsigned *Super = TRI->getSuperRegisters(Reg); - *Super; ++Super) { - unsigned SuperReg = *Super; - Classes[SuperReg] = reinterpret_cast(-1); + + if (!TII->isPredicated(MI)) { + // Predicated defs are modeled as read + write, i.e. similar to two + // address updates. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.erase(Reg); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + KeepRegs.erase(SubregReg); + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast(-1); + } } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -478,7 +505,11 @@ BreakAntiDependencies(const std::vector& SUnits, PrescanInstruction(MI); - if (MI->getDesc().hasExtraDefRegAllocReq()) + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; @@ -490,7 +521,7 @@ BreakAntiDependencies(const std::vector& SUnits, if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - if (MO.isUse() && AntiDepReg == Reg) { + if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; break; } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index cc42dd2..5406300 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -22,15 +22,18 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include namespace llvm { +class TargetInstrInfo; +class TargetRegisterInfo; + class CriticalAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; /// AllocatableSet - The set of allocatable registers. diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 41734dd..710a9f1 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -47,6 +48,8 @@ static cl::opt DisableTriangleFR("disable-ifcvt-triangle-false-rev", cl::init(false), cl::Hidden); static cl::opt DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden); +static cl::opt IfCvtBranchFold("ifcvt-branch-fold", + cl::init(true), cl::Hidden); STATISTIC(NumSimple, "Number of simple if-conversions performed"); STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); @@ -146,6 +149,7 @@ namespace { const TargetLowering *TLI; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; bool MadeChange; int FnNum; public: @@ -176,9 +180,11 @@ namespace { unsigned NumDups1, unsigned NumDups2); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, - SmallVectorImpl &Cond); + SmallVectorImpl &Cond, + SmallSet &Redefs); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, + SmallSet &Redefs, bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI); @@ -226,6 +232,7 @@ FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); if (!TII) return false; DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" @@ -362,7 +369,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { Roots.clear(); BBAnalysis.clear(); - if (MadeChange) { + if (MadeChange && !IfCvtBranchFold) { BranchFolder BF(false); BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), @@ -823,12 +830,17 @@ void IfConverter::AnalyzeBlocks(MachineFunction &MF, /// that all the intervening blocks are empty (given BB can fall through to its /// next block). static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { - MachineFunction::iterator I = BB; + MachineFunction::iterator PI = BB; + MachineFunction::iterator I = llvm::next(PI); MachineFunction::iterator TI = ToBB; MachineFunction::iterator E = BB->getParent()->end(); - while (++I != TI) - if (I == E || !I->empty()) + while (I != TI) { + // Check isSuccessor to avoid case where the next block is empty, but + // it's not a successor. + if (I == E || !I->empty() || !PI->isSuccessor(I)) return false; + PI = I++; + } return true; } @@ -863,6 +875,66 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } +/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are +/// modeled as read + write (sort like two-address instructions). These +/// routines track register liveness and add implicit uses to if-converted +/// instructions to conform to the model. +static void InitPredRedefs(MachineBasicBlock *BB, SmallSet &Redefs, + const TargetRegisterInfo *TRI) { + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Redefs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Redefs.insert(*Subreg); + } +} + +static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, + const TargetRegisterInfo *TRI, + bool AddImpUse = false) { + SmallVector Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Defs.push_back(Reg); + else if (MO.isKill()) { + Redefs.erase(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.erase(*SR); + } + } + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (Redefs.count(Reg)) { + if (AddImpUse) + // Treat predicated update as read + write. + MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/,false/*IsKill*/)); + } else { + Redefs.insert(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.insert(*SR); + } + } +} + +static void UpdatePredRedefs(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallSet &Redefs, + const TargetRegisterInfo *TRI) { + while (I != E) { + UpdatePredRedefs(I, Redefs, TRI); + ++I; + } +} + /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. /// bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { @@ -887,13 +959,19 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { if (TII->ReverseBranchCondition(Cond)) assert(false && "Unable to reverse branch condition!"); + // Initialize liveins to the first BB. These are potentiall re-defined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); } else { - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -971,17 +1049,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } } + // Initialize liveins to the first BB. These are potentiall re-defined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + bool HasEarlyExit = CvtBBI->FalseBB != NULL; bool DupBB = CvtBBI->BB->pred_size() > 1; if (DupBB) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); // Now merge the entry of the triangle with the true block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1085,6 +1169,11 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Remove the conditional branch from entry to the blocks. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Initialize liveins to the first BB. These are potentiall re-defined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(BBI1->BB, Redefs, TRI); + // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); @@ -1102,6 +1191,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, ++DI2; --NumDups1; } + + UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1118,7 +1209,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, ++i; } BBI1->BB->erase(DI1, BBI1->BB->end()); - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs); // Predicate the 'false' block. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); @@ -1132,7 +1223,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, if (!DI2->isDebugValue()) --NumDups2; } - PredicateBlock(*BBI2, DI2, *Cond2); + PredicateBlock(*BBI2, DI2, *Cond2, Redefs); // Merge the true block into the entry of the diamond. MergeBlocks(BBI, *BBI1); @@ -1168,7 +1259,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, /// specified end with the specified condition. void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, - SmallVectorImpl &Cond) { + SmallVectorImpl &Cond, + SmallSet &Redefs) { for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { if (I->isDebugValue() || TII->isPredicated(I)) continue; @@ -1178,6 +1270,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI, #endif llvm_unreachable(0); } + + // If the predicated instruction now re-defines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(I, Redefs, TRI, true); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1192,6 +1288,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, /// the destination block. Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, + SmallSet &Redefs, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); @@ -1207,13 +1304,18 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - if (!isPredicated && !MI->isDebugValue()) + if (!isPredicated && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; #endif llvm_unreachable(0); } + } + + // If the predicated instruction now re-defines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(MI, Redefs, TRI, true); } std::vector Succs(FromBBI.BB->succ_begin(), diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 3690546..cbde2b0 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -72,7 +72,7 @@ void PostRAHazardRecognizer::ScoreBoard::dump() const { } } -PostRAHazardRecognizer::HazardType +ScheduleHazardRecognizer::HazardType PostRAHazardRecognizer::getHazardType(SUnit *SU) { if (ItinData.isEmpty()) return NoHazard; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index b3e7975..3eefeda 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -141,6 +141,10 @@ void RegScavenger::forward() { // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. + // FIXME: The scavenger is not predication aware. If the instruction is + // predicated, conservatively assume "kill" markers do not actually kill the + // register. Similarly ignores "dead" markers. + bool isPred = TII->isPredicated(MI); BitVector EarlyClobberRegs(NumPhysRegs); BitVector KillRegs(NumPhysRegs); BitVector DefRegs(NumPhysRegs); @@ -155,11 +159,11 @@ void RegScavenger::forward() { if (MO.isUse()) { // Two-address operands implicitly kill. - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i))) addRegWithSubRegs(KillRegs, Reg); } else { assert(MO.isDef()); - if (MO.isDead()) + if (!isPred && MO.isDead()) addRegWithSubRegs(DeadRegs, Reg); else addRegWithSubRegs(DefRegs, Reg); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 1a35f6f..78ce2f0 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -62,6 +62,11 @@ EnableARMLongCalls("arm-long-calls", cl::Hidden, cl::desc("Generate calls via indirect call instructions."), cl::init(false)); +static cl::opt +ARMInterworking("arm-interworking", cl::Hidden, + cl::desc("Enable / disable ARM interworking (for debugging only)"), + cl::init(true)); + static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, @@ -1188,7 +1193,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && !isExt; + isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 4ec26db..2101da8 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -27,6 +27,11 @@ EarlyITBlockFormation("thumb2-early-it-blocks", cl::Hidden, cl::desc("Form IT blocks early before register allocation"), cl::init(false)); +static cl::opt +EarlyIfConvert("arm-early-if-convert", cl::Hidden, + cl::desc("Run if-conversion before post-ra scheduling"), + cl::init(false)); + static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -125,13 +130,17 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, // proper scheduling. PM.add(createARMExpandPseudoPass()); + if (EarlyIfConvert && OptLevel != CodeGenOpt::None) { + if (!Subtarget.isThumb1Only()) + PM.add(createIfConverterPass()); + } + return true; } bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None) { + if (!EarlyIfConvert && OptLevel != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); } diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll index e3086a3..eb6d593 100644 --- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll +++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll @@ -12,6 +12,8 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string< ; CHECK: _ZNKSs7compareERKSs: ; CHECK: it eq ; CHECK-NEXT: subeq.w r0, r6, r8 +; CHECK-NEXT: %bb +; CHECK-NEXT: %bb1 ; CHECK-NEXT: ldmia.w sp, {r4, r5, r6, r8, r9, pc} entry: %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %this) ; [#uses=3] diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll index 4af492c..08e6f38 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll @@ -31,7 +31,7 @@ entry: ; CHECK: CountTree: ; CHECK: it eq ; CHECK: cmpeq -; CHECK: bne +; CHECK: beq ; CHECK: itt eq ; CHECK: moveq ; CHECK: popeq -- cgit v1.1