aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-10-07 20:50:20 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-10-07 20:50:20 +0000
commit835580fc3ada684cd30e21c1367ea880d2237a0f (patch)
tree99b9ce63b2a299664cbbe0e1ca6c5c9d9d71d24f
parent9f36c4e1562c3ad6bf9faac496446970d86f7cc4 (diff)
downloadexternal_llvm-835580fc3ada684cd30e21c1367ea880d2237a0f.zip
external_llvm-835580fc3ada684cd30e21c1367ea880d2237a0f.tar.gz
external_llvm-835580fc3ada684cd30e21c1367ea880d2237a0f.tar.bz2
Canonicalize X86ISD::MOVDDUP nodes to v2f64 to make sure all cases match. Also eliminate unneeded isel patterns. rdar://8520311
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115977 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--lib/Target/X86/X86InstrSSE.td22
-rw-r--r--test/CodeGen/X86/sse3.ll17
3 files changed, 38 insertions, 17 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 90ef6df..66f9612 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5083,6 +5083,7 @@ static bool MayFoldVectorLoad(SDValue V) {
// uses while it only has one, use this version, and let isel match
// another instruction if the load really happens to have more than
// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
static bool RelaxedMayFoldVectorLoad(SDValue V) {
if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
V = V.getOperand(0);
@@ -5170,6 +5171,17 @@ bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
}
static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Canonizalize to v2f64.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, V1);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+ V1, DAG));
+}
+
+static
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
@@ -5309,7 +5321,7 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
if (VT.getVectorNumElements() <= 4)
return SDValue();
- // Canonize all of the remaining to v4f32.
+ // Canonicalize all of the remaining to v4f32.
return PromoteSplat(SVOp, DAG);
}
@@ -5394,7 +5406,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
RelaxedMayFoldVectorLoad(V1))
- return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
+ return getMOVDDup(Op, dl, V1, DAG);
if (X86::isMOVHLPS_v_undef_Mask(SVOp))
return getMOVHighToLow(Op, dl, DAG);
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ced3bb9..c37def1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5537,19 +5537,14 @@ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
(MOVDDUPrm addr:$src)>;
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
@@ -5564,6 +5559,7 @@ def : Pat<(X86Movddup (bc_v2f64
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
(MOVDDUPrm addr:$src)>;
+
// Shuffle with UNPCKLPS
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
@@ -5675,14 +5671,11 @@ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
-// FIXME: Instead of X86Movddup, there should be a X86Movlhps here, the problem
+
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
-def : Pat<(v2i64 (X86Movddup VR128:$src)),
- (MOVLHPSrr VR128:$src, VR128:$src)>;
-def : Pat<(v4f32 (X86Movddup VR128:$src)),
- (MOVLHPSrr VR128:$src, VR128:$src)>;
def : Pat<(v2f64 (X86Movddup VR128:$src)),
(UNPCKLPDrr VR128:$src, VR128:$src)>;
@@ -5690,6 +5683,7 @@ def : Pat<(v2f64 (X86Movddup VR128:$src)),
def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
+
// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 206cdff..9a60091 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -169,7 +169,7 @@ define internal void @t10() nounwind {
ret void
; X64: t10:
; X64: pextrw $4, %xmm0, %eax
-; X64: movlhps %xmm1, %xmm1
+; X64: unpcklpd %xmm1, %xmm1
; X64: pshuflw $8, %xmm1, %xmm1
; X64: pinsrw $2, %eax, %xmm1
; X64: pextrw $6, %xmm0, %eax
@@ -260,3 +260,18 @@ entry:
; X64: pinsrw $1, %eax, %xmm0
; X64: ret
}
+
+; rdar://8520311
+define <4 x i32> @t17() nounwind {
+entry:
+; X64: t17:
+; X64: movddup (%rax), %xmm0
+ %tmp1 = load <4 x float>* undef, align 16
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ %tmp3 = load <4 x float>* undef, align 16
+ %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+ %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
+ %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+ %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
+ ret <4 x i32> %tmp7
+}