aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorManman Ren <mren@apple.com>2012-08-08 00:51:41 +0000
committerManman Ren <mren@apple.com>2012-08-08 00:51:41 +0000
commit39ad568c62f5120faec29f69d3d614303a1f992d (patch)
tree4512b094b076453ca2fe06fa3a8a17fbe32f275b /lib
parent130e603115129ec1a6b94550a15f7caea0cbf068 (diff)
downloadexternal_llvm-39ad568c62f5120faec29f69d3d614303a1f992d.zip
external_llvm-39ad568c62f5120faec29f69d3d614303a1f992d.tar.gz
external_llvm-39ad568c62f5120faec29f69d3d614303a1f992d.tar.bz2
X86: enable CSE between CMP and SUB
We perform the following: 1> Use SUB instead of CMP for i8,i16,i32 and i64 in ISel lowering. 2> Modify MachineCSE to correctly handle implicit defs. 3> Convert SUB back to CMP if possible at peephole. Removed pattern matching of (a>b) ? (a-b):0 and like, since they are handled by peephole now. rdar://11873276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161462 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/MachineCSE.cpp20
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp7
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp56
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp80
5 files changed, 116 insertions, 49 deletions
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 0a76f8d..5153abb 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -417,6 +417,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool Changed = false;
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
MachineInstr *MI = &*I;
++I;
@@ -486,15 +487,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumDefs = MI->getDesc().getNumDefs() +
+ MI->getDesc().getNumImplicitDefs();
+
for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned OldReg = MO.getReg();
unsigned NewReg = CSMI->getOperand(i).getReg();
- if (OldReg == NewReg)
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+ if (OldReg == NewReg) {
+ --NumDefs;
continue;
+ }
assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
TargetRegisterInfo::isVirtualRegister(NewReg) &&
@@ -526,6 +536,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->clearKillFlags(CSEPairs[i].second);
}
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
+ CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+
if (CrossMBBPhysDef) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
@@ -549,6 +564,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
Exps.push_back(MI);
}
CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
}
return Changed;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 3160758..f6df4d3 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2438,7 +2438,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- case X86ISD::CMP: {
+ case X86ISD::CMP:
+ case X86ISD::SUB: {
+ // Sometimes a SUB is used to perform comparison.
+ if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
+ // This node is not a CMP.
+ break;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d70cdb8..33bc1c9 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8333,11 +8333,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
switch (Op.getNode()->getOpcode()) {
default: llvm_unreachable("unexpected operator!");
case ISD::SUB:
- // If the only use of SUB is EFLAGS, use CMP instead.
- if (Op.hasOneUse())
- Opcode = X86ISD::CMP;
- else
- Opcode = X86ISD::SUB;
+ Opcode = X86ISD::SUB;
break;
case ISD::OR: Opcode = X86ISD::OR; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
@@ -8391,6 +8387,14 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
return EmitTest(Op0, X86CC, DAG);
DebugLoc dl = Op0.getDebugLoc();
+ if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
+ Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
+ // Use SUB instead of CMP to enable CSE between SUB and CMP.
+ SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
+ SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
+ Op0, Op1);
+ return SDValue(Sub.getNode(), 1);
+ }
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
@@ -8765,46 +8769,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = NewCond;
}
- // Handle the following cases related to max and min:
- // (a > b) ? (a-b) : 0
- // (a >= b) ? (a-b) : 0
- // (b < a) ? (a-b) : 0
- // (b <= a) ? (a-b) : 0
- // Comparison is removed to use EFLAGS from SUB.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2))
- if (Cond.getOpcode() == X86ISD::SETCC &&
- Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
- (Op1.getOpcode() == ISD::SUB || Op1.getOpcode() == X86ISD::SUB) &&
- C->getAPIntValue() == 0) {
- SDValue Cmp = Cond.getOperand(1);
- unsigned CC = cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
- if ((DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(0)) &&
- DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(1)) &&
- (CC == X86::COND_G || CC == X86::COND_GE ||
- CC == X86::COND_A || CC == X86::COND_AE)) ||
- (DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(1)) &&
- DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(0)) &&
- (CC == X86::COND_L || CC == X86::COND_LE ||
- CC == X86::COND_B || CC == X86::COND_BE))) {
-
- if (Op1.getOpcode() == ISD::SUB) {
- SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i32);
- SDValue New = DAG.getNode(X86ISD::SUB, DL, VTs,
- Op1.getOperand(0), Op1.getOperand(1));
- DAG.ReplaceAllUsesWith(Op1, New);
- Op1 = New;
- }
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
- unsigned NewCC = (CC == X86::COND_G || CC == X86::COND_GE ||
- CC == X86::COND_L ||
- CC == X86::COND_LE) ? X86::COND_GE : X86::COND_AE;
- SDValue Ops[] = { Op2, Op1, DAG.getConstant(NewCC, MVT::i8),
- SDValue(Op1.getNode(), 1) };
- return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
- }
- }
-
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
// (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
@@ -8945,7 +8909,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// a < b ? 0 : -1 -> RES = setcc_carry
// a >= b ? -1 : 0 -> RES = setcc_carry
// a >= b ? 0 : -1 -> RES = ~setcc_carry
- if (Cond.getOpcode() == X86ISD::CMP) {
+ if (Cond.getOpcode() == X86ISD::SUB) {
Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index b6ba68f..f790611 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
X86xor_flag, xor, 1, 0>;
defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
X86add_flag, add, 1, 1>;
+let isCompare = 1 in {
defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
X86sub_flag, sub, 0, 0>;
+}
// Arithmetic.
let Uses = [EFLAGS] in {
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index b26c08f..831caaa 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3031,6 +3031,37 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
+ // A SUB can be used to perform comparison.
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(2).getImm();
+ return true;
case X86::CMP64rr:
case X86::CMP32rr:
case X86::CMP16rr:
@@ -3139,6 +3170,55 @@ bool X86InstrInfo::
optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const {
+ // Check whether we can replace SUB with CMP.
+ unsigned NewOpcode = 0;
+ switch (CmpInstr->getOpcode()) {
+ default: break;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr: {
+ if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
+ return false;
+ // There is no use of the destination register, we can replace SUB with CMP.
+ switch (CmpInstr->getOpcode()) {
+ default: llvm_unreachable(0);
+ case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
+ case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
+ case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
+ case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
+ case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
+ case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
+ case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
+ case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
+ case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
+ case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
+ case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
+ case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
+ case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
+ case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
+ case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
+ }
+ CmpInstr->setDesc(get(NewOpcode));
+ CmpInstr->RemoveOperand(0);
+ // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
+ if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
+ NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
+ return false;
+ }
+ }
+
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;