aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp8
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp25
-rw-r--r--lib/Target/X86/X86Instr64bit.td22
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp14
-rw-r--r--lib/Target/X86/X86InstrInfo.td15
-rw-r--r--test/CodeGen/X86/remat-mov-0.ll1
6 files changed, 46 insertions, 39 deletions
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 1015b69..a4939af 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -399,6 +399,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(X86::MOVZX32rm16);
lower_subreg32(&OutMI, 0);
break;
+ case X86::MOV16r0:
+ OutMI.setOpcode(X86::MOV32r0);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOV64r0:
+ OutMI.setOpcode(X86::MOV32r0);
+ lower_subreg32(&OutMI, 0);
+ break;
}
}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c795b62..e2a53d1 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1873,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
unsigned LoReg, HiReg, ClrReg;
unsigned ClrOpcode, SExtOpcode;
- EVT ClrVT = NVT;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
@@ -1883,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
- ClrOpcode = X86::MOV32r0; ClrReg = X86::EDX; ClrVT = MVT::i32;
+ ClrOpcode = X86::MOV16r0; ClrReg = X86::DX;
SExtOpcode = X86::CWD;
break;
case MVT::i32:
@@ -1893,7 +1892,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
break;
case MVT::i64:
LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
- ClrOpcode = ~0U; // NOT USED.
+ ClrOpcode = X86::MOV64r0;
SExtOpcode = X86::CQO;
break;
}
@@ -1932,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
- SDValue ClrNode;
-
- if (NVT.getSimpleVT() == MVT::i64) {
- ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
- 0);
- // We just did a 32-bit clear, insert it into a 64-bit register to
- // clear the whole 64-bit reg.
- SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
- SDValue SubRegNo =
- CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
- ClrNode =
- SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
- MVT::i64, Zero, ClrNode, SubRegNo),
- 0);
- } else {
- ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0);
- }
-
+ SDValue ClrNode =
+ SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
ClrNode, InFlag).getValue(1);
}
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 7077cf9..d67a482 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1598,17 +1598,21 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
// Alias Instructions
//===----------------------------------------------------------------------===//
-// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
-// equivalent due to implicit zero-extending, and it sometimes has a smaller
-// encoding.
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
// when we have a better way to specify isel priority.
-let AddedComplexity = 1 in
-def : Pat<(i64 0),
- (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;
-
-
-// Materialize i64 constant where top 32-bits are zero.
+let Defs = [EFLAGS],
+ AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
+ "",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
"", [(set GR64:$dst, i64immZExt32:$src)]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 52077cf..5ef3354 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1072,12 +1072,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
switch (Opc) {
default: break;
case X86::MOV8r0:
- case X86::MOV32r0: {
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: {
if (!isSafeToClobberEFLAGS(MBB, I)) {
switch (Opc) {
default: break;
case X86::MOV8r0: Opc = X86::MOV8ri; break;
+ case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri; break;
}
Clone = false;
}
@@ -2344,8 +2348,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
- if (MI->getOpcode() == X86::MOV32r0)
+ if (MI->getOpcode() == X86::MOV64r0)
+ NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV32r0)
NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV16r0)
+ NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
else if (MI->getOpcode() == X86::MOV8r0)
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
if (NewMI)
@@ -2613,7 +2621,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
} else if (OpNum == 0) { // If operand 0
switch (Opc) {
case X86::MOV8r0:
+ case X86::MOV16r0:
case X86::MOV32r0:
+ case X86::MOV64r0:
return true;
default: break;
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 9b69018..aac9f38 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -3734,18 +3734,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
"xor{b}\t$dst, $dst",
[(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "",
+ [(set GR16:$dst, 0)]>, OpSize;
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
"xor{l}\t$dst, $dst",
[(set GR32:$dst, 0)]>;
}
-// Use xorl instead of xorw since we don't care about the high 16 bits,
-// it's smaller, and it avoids a partial-register update.
-let AddedComplexity = 1 in
-def : Pat<(i16 0),
- (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>;
-
//===----------------------------------------------------------------------===//
// Thread Local Storage Instructions
//
diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll
index 4c96cb4..c4f768c 100644
--- a/test/CodeGen/X86/remat-mov-0.ll
+++ b/test/CodeGen/X86/remat-mov-0.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -march=x86-64 | grep {xorl %edi, %edi} | count 4
-; XFAIL: *
; CodeGen should remat the zero instead of spilling it.