aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp32
-rw-r--r--test/CodeGen/X86/mmx-copy-gprs.ll14
2 files changed, 46 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b5e91ce..96a58c1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -704,6 +704,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::STORE);
computeRegisterProperties();
@@ -5872,6 +5873,35 @@ static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDOperand();
}
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
+ // the FP state in cases where an emms may be missing.
+ if (MVT::isVector(St->getValue().getValueType()) &&
+ MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
+ // Must be a store of a load.
+ isa<LoadSDNode>(St->getChain()) &&
+ St->getChain().Val == St->getValue().Val &&
+ St->getValue().hasOneUse() && St->getChain().hasOneUse() &&
+ !St->isVolatile() && !cast<LoadSDNode>(St->getChain())->isVolatile()) {
+ LoadSDNode *Ld = cast<LoadSDNode>(St->getChain());
+
+ // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+ if (Subtarget->is64Bit()) {
+ SDOperand NewLd = DAG.getLoad(MVT::i64, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->getAlignment());
+ return DAG.getStore(NewLd.getValue(1), NewLd, St->getBasePtr(),
+ St->getSrcValue(), St->getSrcValueOffset(),
+ St->isVolatile(), St->getAlignment());
+ }
+
+ // TODO: 2 32-bit copies.
+ }
+ return SDOperand();
+}
+
/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
/// X86ISD::FXOR nodes.
static SDOperand PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
@@ -5908,6 +5938,8 @@ SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case ISD::STORE:
+ return PerformSTORECombine(cast<StoreSDNode>(N), DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
new file mode 100644
index 0000000..8cf36e0
--- /dev/null
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
+
+; This test should use GPRs to copy the mmx value, not MMX regs. Using mmx regs,
+; increases the places that need to use emms.
+
+; rdar://5741668
+target triple = "x86_64-apple-darwin8"
+
+define i32 @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind {
+entry:
+ %tmp1 = load <1 x i64>* %y, align 8 ; <<1 x i64>> [#uses=1]
+ store <1 x i64> %tmp1, <1 x i64>* %x, align 8
+ ret i32 undef
+}