aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/TargetInstrInfo.h18
-rw-r--r--lib/CodeGen/BranchFolding.cpp18
-rw-r--r--lib/CodeGen/IfConversion.cpp7
-rw-r--r--lib/CodeGen/MachineVerifier.cpp3
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp28
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp42
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp28
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td1
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td7
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp7
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp123
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp59
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h9
-rw-r--r--test/CodeGen/ARM/ifcvt2.ll15
-rw-r--r--test/CodeGen/Thumb2/thumb2-cbnz.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt2.ll1
18 files changed, 302 insertions, 82 deletions
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 21675dc..90608f1 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -321,6 +321,12 @@ public:
assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!");
return 0;
}
+
+ /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+ /// after it, replacing it with an unconditional branch to NewDest. This is
+ /// used by the tail merging pass.
+ virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const = 0;
/// copyRegToReg - Emit instructions to copy between a pair of registers. It
/// returns false if the target does not how to copy between the specified
@@ -562,6 +568,13 @@ public:
return true;
}
+ /// isSchedulingBoundary - Test if the given instruction should be
+ /// considered a scheduling boundary. This primarily includes labels and
+ /// terminators.
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const = 0;
+
/// GetInstSize - Returns the size of the specified Instruction.
///
virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const {
@@ -595,6 +608,8 @@ protected:
TargetInstrInfoImpl(const TargetInstrDesc *desc, unsigned NumOpcodes)
: TargetInstrInfo(desc, NumOpcodes) {}
public:
+ virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) const;
virtual MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI = false) const;
virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
@@ -610,6 +625,9 @@ public:
MachineFunction &MF) const;
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const;
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
virtual ScheduleHazardRecognizer *
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index e440e40..e17e47a 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -358,24 +358,10 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
}
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
-/// after it, replacing it with an unconditional branch to NewDest. This
-/// returns true if OldInst's block is modified, false if NewDest is modified.
+/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
- MachineBasicBlock *OldBB = OldInst->getParent();
-
- // Remove all the old successors of OldBB from the CFG.
- while (!OldBB->succ_empty())
- OldBB->removeSuccessor(OldBB->succ_begin());
-
- // Remove all the dead instructions from the end of OldBB.
- OldBB->erase(OldInst, OldBB->end());
-
- // If OldBB isn't immediately before OldBB, insert a branch to it.
- if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
- TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
- OldInst->getDebugLoc());
- OldBB->addSuccessor(NewDest);
+ TII->ReplaceTailWithBranchTo(OldInst, NewDest);
++NumTailMerge;
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 36e6fc7..16d3247 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -235,6 +235,12 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getTarget().getRegisterInfo();
if (!TII) return false;
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true);
+ bool BFChange = BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+
DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
<< MF.getFunction()->getName() << "\'");
@@ -376,6 +382,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
getAnalysisIfAvailable<MachineModuleInfo>());
}
+ MadeChange |= BFChange;
return MadeChange;
}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 8baf01c..2297c90 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -390,7 +390,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && MBB->back().getDesc().isBarrier()) {
+ if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+ !TII->isPredicated(&MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f3c725d..4af8e07 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -79,6 +79,7 @@ AntiDepBreaker::~AntiDepBreaker() { }
namespace {
class PostRAScheduler : public MachineFunctionPass {
AliasAnalysis *AA;
+ const TargetInstrInfo *TII;
CodeGenOpt::Level OptLevel;
public:
@@ -181,30 +182,9 @@ namespace {
};
}
-/// isSchedulingBoundary - Test if the given instruction should be
-/// considered a scheduling boundary. This primarily includes labels
-/// and terminators.
-///
-static bool isSchedulingBoundary(const MachineInstr *MI,
- const MachineFunction &MF) {
- // Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
- return true;
-
- // Don't attempt to schedule around any instruction that defines
- // a stack-oriented pointer, as it's unlikely to be profitable. This
- // saves compile time, because it doesn't require every single
- // stack slot reference to depend on the instruction that does the
- // modification.
- const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
- if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
- return true;
-
- return false;
-}
-
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
AA = &getAnalysis<AliasAnalysis>();
+ TII = Fn.getTarget().getInstrInfo();
// Check for explicit enable/disable of post-ra scheduling.
TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
@@ -265,8 +245,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator Current = MBB->end();
unsigned Count = MBB->size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
- MachineInstr *MI = prior(I);
- if (isSchedulingBoundary(MI, Fn)) {
+ MachineInstr *MI = llvm::prior(I);
+ if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
Scheduler.Run(MBB, I, Current, CurrentCount);
Scheduler.EmitSchedule();
Current = MI;
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 1641c71..53f3ee8 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/SmallVector.h"
@@ -27,6 +28,25 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+void
+TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+
+ // Remove all the old successors of MBB from the CFG.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_begin());
+
+ // Remove all the dead instructions from the end of MBB.
+ MBB->erase(Tail, MBB->end());
+
+ // If MBB isn't immediately before MBB, insert a branch to it.
+ if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+ InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+ Tail->getDebugLoc());
+ MBB->addSuccessor(NewDest);
+}
+
// commuteInstruction - The default implementation of this method just exchanges
// the two operands returned by findCommutedOpIndices.
MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
@@ -316,6 +336,28 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
return true;
}
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const{
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ return true;
+
+ return false;
+}
+
// Default implementation of CreateTargetPostRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfoImpl::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6eb3fab..8d3ad80 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1306,6 +1306,34 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ if (++I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ if (MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index fec26e1..bc82e14 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -319,6 +319,10 @@ public:
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
};
static inline
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 0172bb4..c2cb05e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2528,6 +2528,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
// IT block
+let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, Size2Bytes, IIC_iALUx,
"it$mask\t$cc", "", []> {
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 0134276..7e57a1c 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -88,6 +88,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -97,7 +100,8 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
+ JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
+ HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -108,7 +112,8 @@ public:
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
+ JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
+ HasITBlocks(false) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -229,6 +234,9 @@ public:
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b9417e3..e41d9fc 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -197,9 +197,9 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
}
// Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
-
-def FPSCR : ARMReg<1, "fpscr">;
+def CPSR : ARMReg<0, "cpsr">;
+def FPSCR : ARMReg<1, "fpscr">;
+def ITSTATE : ARMReg<2, "itstate">;
// Register classes.
//
@@ -557,4 +557,3 @@ def QQQQPR : RegisterClass<"ARM", [v8i64],
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
-
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 2101da8..06c893a 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -131,8 +131,10 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
PM.add(createARMExpandPseudoPass());
if (EarlyIfConvert && OptLevel != CodeGenOpt::None) {
- if (!Subtarget.isThumb1Only())
+ if (!Subtarget.isThumb1Only())
PM.add(createIfConverterPass());
+ if (Subtarget.isThumb2())
+ PM.add(createThumb2ITBlockPass());
}
return true;
@@ -146,7 +148,8 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
}
if (Subtarget.isThumb2()) {
- PM.add(createThumb2ITBlockPass());
+ if (!EarlyIfConvert)
+ PM.add(createThumb2ITBlockPass());
PM.add(createThumb2SizeReductionPass());
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index bd8be96..d72bb5d 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -31,6 +31,7 @@ namespace {
MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -52,6 +53,10 @@ namespace {
SmallVector<MachineInstr*,4> &LastUses);
bool InsertITBlock(MachineInstr *First, MachineInstr *Last);
bool InsertITBlocks(MachineBasicBlock &MBB);
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
bool InsertITInstructions(MachineBasicBlock &MBB);
};
char Thumb2ITBlockPass::ID = 0;
@@ -249,20 +254,77 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
return Modified;
}
-static void TrackDefUses(MachineInstr *MI, SmallSet<unsigned, 4> &Defs,
- SmallSet<unsigned, 4> &Uses) {
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg)
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
continue;
- if (MO.isDef())
- Defs.insert(Reg);
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
else
- Uses.insert(Reg);
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ Defs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
+ "Sub-register indices still around?");
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
}
+ return false;
}
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
@@ -283,15 +345,21 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
Defs.clear();
Uses.clear();
- TrackDefUses(MI, Defs, Uses);
+ TrackDefUses(MI, Defs, Uses, TRI);
// Insert an IT instruction.
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
.addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
MachineBasicBlock::iterator InsertPos = MIB;
++MBBI;
- // Finalize IT mask.
+ // Form IT block.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
// Branches, including tricky ones like LDM_RET, need to end an IT
@@ -306,35 +374,36 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
unsigned NPredReg = 0;
ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
- if (NCC == CC || NCC == OCC)
+ if (NCC == CC || NCC == OCC) {
Mask |= (NCC & 1) << Pos;
- else {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
if (NCC == ARMCC::AL &&
- TII->isMoveInstr(*NMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
- "Sub-register indices still around?");
- // llvm models select's as two-address instructions. That means a copy
- // is inserted before a t2MOVccr, etc. If the copy is scheduled in
- // between selects we would end up creating multiple IT blocks.
- if (!Uses.count(DstReg) && !Defs.count(SrcReg)) {
- --MBBI;
- MBB.remove(NMI);
- MBB.insert(InsertPos, NMI);
- ++NumMovedInsts;
- continue;
- }
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
}
break;
}
- TrackDefUses(NMI, Defs, Uses);
+ TrackDefUses(NMI, Defs, Uses, TRI);
--Pos;
}
+ // Finalize IT mask.
Mask |= (1 << Pos);
// Tag along (firstcond[0] << 4) with the mask.
Mask |= (CC & 1) << 4;
MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
Modified = true;
++NumITs;
}
@@ -346,6 +415,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
if (!AFI->isThumbFunction())
return false;
@@ -360,6 +430,9 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
Modified |= InsertITInstructions(MBB);
}
+ if (Modified && !PreRegAlloc)
+ AFI->setHasITBlocks(true);
+
return Modified;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index f78111f..866ffb1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -17,12 +17,13 @@
#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
+#include "Thumb2HazardRecognizer.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/SmallVector.h"
-#include "Thumb2InstrInfo.h"
using namespace llvm;
@@ -35,6 +36,57 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
+ }
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
+ }
+}
+
bool
Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
@@ -116,6 +168,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
+ScheduleHazardRecognizer *Thumb2InstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
+ return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 3f233c4..d5fc359 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -20,7 +20,8 @@
#include "Thumb2RegisterInfo.h"
namespace llvm {
- class ARMSubtarget;
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
class Thumb2InstrInfo : public ARMBaseInstrInfo {
Thumb2RegisterInfo RI;
@@ -31,6 +32,9 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const;
+
bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
@@ -60,6 +64,9 @@ public:
/// always be able to get register info as well (through this method).
///
const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
};
}
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
index d9cac80..7b9d0cf 100644
--- a/test/CodeGen/ARM/ifcvt2.ll
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -1,10 +1,8 @@
-; RUN: llc < %s -march=arm > %t
-; RUN: grep bxlt %t | count 1
-; RUN: grep bxgt %t | count 1
-; RUN: not grep bxge %t
-; RUN: not grep bxle %t
+; RUN: llc < %s -march=arm | FileCheck %s
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: bxlt lr
%tmp2 = icmp sgt i32 %c, 10
%tmp5 = icmp slt i32 %d, 4
%tmp8 = or i1 %tmp5, %tmp2
@@ -21,6 +19,13 @@ UnifiedReturnBlock:
}
define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t2:
+; CHECK: bxgt lr
+; CHECK: cmp
+; CHECK: addge
+; CHECK: subge
+; CHECK-NOT: bxge lr
+; CHECK: bx lr
%tmp2 = icmp sgt i32 %c, 10
%tmp5 = icmp slt i32 %d, 4
%tmp8 = and i1 %tmp5, %tmp2
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 4f4c9af..10a4985 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -21,7 +21,7 @@ bb7: ; preds = %bb3
bb9: ; preds = %bb7
; CHECK: cmp r0, #0
-; CHECK-NEXT: cmp r0, #0
+; CHECK: cmp r0, #0
; CHECK-NEXT: cbnz
%0 = tail call double @floor(double %b) nounwind readnone ; <double> [#uses=0]
br label %bb11
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 4af492c..2c57348 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -32,6 +32,7 @@ entry:
; CHECK: it eq
; CHECK: cmpeq
; CHECK: bne
+; CHECK: cmp
; CHECK: itt eq
; CHECK: moveq
; CHECK: popeq