aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2012-07-04 00:09:58 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2012-07-04 00:09:58 +0000
commit59bde4d8a1065d3c48c5ea94a570b2c4d2f294a8 (patch)
treef600a6525ac42bdb4b175f04c7f0f46375483655
parent33242fd3ed5586091e73254b58dd1825e9d53c60 (diff)
downloadexternal_llvm-59bde4d8a1065d3c48c5ea94a570b2c4d2f294a8.zip
external_llvm-59bde4d8a1065d3c48c5ea94a570b2c4d2f294a8.tar.gz
external_llvm-59bde4d8a1065d3c48c5ea94a570b2c4d2f294a8.tar.bz2
Add early if-conversion support to X86.
Implement the TII hooks needed by EarlyIfConversion to create cmov instructions and estimate their latency. Early if-conversion is still not enabled by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159695 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp80
-rw-r--r--lib/Target/X86/X86InstrInfo.h8
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp7
-rw-r--r--test/CodeGen/X86/early-ifcvt.ll39
4 files changed, 133 insertions, 1 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 041302b..ca49d95 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2295,6 +2295,37 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
}
}
+/// getCMovFromCond - Return a cmov(rr) opcode for the given condition and
+/// register size in bytes.
+static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes) {
+ static const unsigned Opc[16][3] = {
+ { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
+ { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
+ { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
+ { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
+ { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
+ { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
+ { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
+ { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
+ { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
+ { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
+ { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
+ { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
+ { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
+ { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
+ { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
+ { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }
+ };
+
+ assert(CC < 16 && "Can only handle standard cond codes");
+ switch(RegBytes) {
+ default: llvm_unreachable("Illegal register size!");
+ case 2: return Opc[CC][0];
+ case 4: return Opc[CC][1];
+ case 8: return Opc[CC][2];
+ }
+}
+
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
if (!MI->isTerminator()) return false;
@@ -2519,6 +2550,55 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return Count;
}
+bool X86InstrInfo::
+canInsertSelect(const MachineBasicBlock &MBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ // Not all subtargets have cmov instructions.
+ if (!TM.getSubtarget<X86Subtarget>().hasCMov())
+ return false;
+ if (Cond.size() != 1)
+ return false;
+ // We cannot do the composite conditions, at least not in SSA form.
+ if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
+ if (X86::GR16RegClass.hasSubClassEq(RC) ||
+ X86::GR32RegClass.hasSubClassEq(RC) ||
+ X86::GR64RegClass.hasSubClassEq(RC)) {
+ // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
+ // Bridge. Probably Ivy Bridge as well.
+ CondCycles = 2;
+ TrueCycles = 2;
+ FalseCycles = 2;
+ return true;
+ }
+
+ // Can't do vectors.
+ return false;
+}
+
+void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ assert(Cond.size() == 1 && "Invalid Cond array");
+ unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
+ MRI.getRegClass(DstReg)->getSize());
+ BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
+}
+
/// isHReg - Test if the given register is a physical h register.
static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 4006dad..2009668 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -219,6 +219,14 @@ public:
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
+ virtual bool canInsertSelect(const MachineBasicBlock&,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned, unsigned, int&, int&, int&) const;
+ virtual void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index c66a554..b7ba568 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -140,7 +140,12 @@ public:
} // namespace
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new X86PassConfig(this, PM);
+ X86PassConfig *PC = new X86PassConfig(this, PM);
+
+ if (Subtarget.hasCMov())
+ PC->enablePass(&EarlyIfConverterID);
+
+ return PC;
}
bool X86PassConfig::addInstSelector() {
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
new file mode 100644
index 0000000..ce1fe04
--- /dev/null
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -enable-early-ifcvt -stress-early-ifcvt | FileCheck %s
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: mm2
+define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp {
+entry:
+ br label %do.body
+
+; CHECK: do.body
+; Loop body has no branches before the backedge.
+; CHECK-NOT: LBB
+do.body:
+ %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ]
+ %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
+ %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
+ %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
+ %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
+ %0 = load i32* %p.addr.0, align 4
+ %cmp = icmp sgt i32 %0, %max.0
+ br i1 %cmp, label %do.cond, label %if.else
+
+if.else:
+ %cmp1 = icmp slt i32 %0, %min.0
+ %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0
+ br label %do.cond
+
+do.cond:
+ %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ]
+ %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ]
+; CHECK: decl %esi
+; CHECK: jne LBB
+ %dec = add i32 %n.addr.0, -1
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %do.end, label %do.body
+
+do.end:
+ %sub = sub nsw i32 %max.1, %min.1
+ ret i32 %sub
+}