aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2008-09-25 20:50:48 +0000
committerEvan Cheng <evan.cheng@apple.com>2008-09-25 20:50:48 +0000
commit0b457f0c3a7e21b1fb9ac8b9f8e404e1312b6a60 (patch)
tree2ba33e7e3f6ad009c22409a680fc91cb58deb29b
parentc9c6da61ac027d9818652d417907f84398288b99 (diff)
downloadexternal_llvm-0b457f0c3a7e21b1fb9ac8b9f8e404e1312b6a60.zip
external_llvm-0b457f0c3a7e21b1fb9ac8b9f8e404e1312b6a60.tar.gz
external_llvm-0b457f0c3a7e21b1fb9ac8b9f8e404e1312b6a60.tar.bz2
With sse3 and when the source is a load or has multiple uses, favors movddup over shuffp*, pshufd, etc. Without sse3 or when the source is from a register, make use of movlhps
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56620 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp77
-rw-r--r--lib/Target/X86/X86ISelLowering.h4
-rw-r--r--lib/Target/X86/X86InstrSSE.td33
-rw-r--r--test/CodeGen/X86/vec_shuffle-22.ll16
4 files changed, 113 insertions, 17 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 55238eb..d0b726f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2516,6 +2516,21 @@ bool X86::isSplatLoMask(SDNode *N) {
return true;
}
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+bool X86::isMOVDDUPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned e = N->getNumOperands() / 2;
+ for (unsigned i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i))
+ return false;
+ for (unsigned i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(e+i), i))
+ return false;
+ return true;
+}
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
@@ -2683,15 +2698,14 @@ static bool ShouldXformToMOVHLPS(SDNode *Mask) {
/// is promoted to a vector. It also returns the LoadSDNode by reference if
/// required.
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
- if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
- N = N->getOperand(0).getNode();
- if (ISD::isNON_EXTLoad(N)) {
- if (LD)
- *LD = cast<LoadSDNode>(N);
- return true;
- }
- }
- return false;
+ if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+ N = N->getOperand(0).getNode();
+ if (!ISD::isNON_EXTLoad(N))
+ return false;
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
}
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
@@ -2943,6 +2957,46 @@ static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) {
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
+/// isVectorLoad - Returns true if the node is a vector load, a scalar
+/// load that's promoted to vector, or a load bitcasted.
+static bool isVectorLoad(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Expected a vector type");
+ if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ Op.getOpcode() == ISD::BIT_CONVERT) {
+ return isa<LoadSDNode>(Op.getOperand(0));
+ }
+ return isa<LoadSDNode>(Op);
+}
+
+
+/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64.
+///
+static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask,
+ SelectionDAG &DAG, bool HasSSE3) {
+ // If we have sse3 and shuffle has more than one use or input is a load, then
+ // use movddup. Otherwise, use movlhps.
+ bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1));
+ MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32;
+ MVT VT = Op.getValueType();
+ if (VT == PVT)
+ return Op;
+ unsigned NumElems = PVT.getVectorNumElements();
+ if (NumElems == 2) {
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+ } else {
+ assert(NumElems == 4);
+ SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst0, Cst1, Cst0, Cst1);
+ }
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1);
+ SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1,
+ DAG.getNode(ISD::UNDEF, PVT), Mask);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
+}
+
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
/// vector of zero or undef vector. This produces a shuffle where the low
/// element of V2 is swizzled into the zero/undef vector, landing at element
@@ -3894,6 +3948,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
}
+ // Canonicalize movddup shuffles.
+ if (V2IsUndef && Subtarget->hasSSE2() &&
+ X86::isMOVDDUPMask(PermMask.getNode()))
+ return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3());
+
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index a2d8a11..784069b 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -295,6 +295,10 @@ namespace llvm {
/// specifies a splat of zero element.
bool isSplatLoMask(SDNode *N);
+ /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+ bool isMOVDDUPMask(SDNode *N);
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index d67beac..f9de8e7 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -208,6 +208,10 @@ def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
return X86::isSplatLoMask(N);
}]>;
+def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVDDUPMask(N);
+}]>;
+
def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVHLPSMask(N);
}]>;
@@ -755,6 +759,11 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:
} // AddedComplexity
} // Constraints = "$src1 = $dst"
+let AddedComplexity = 15 in
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)),
+ (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+
+
// Arithmetic
@@ -2452,16 +2461,24 @@ def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movddup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (vector_shuffle
- VR128:$src, (undef),
- SSE_splat_lo_mask)))]>;
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle VR128:$src, (undef),
+ MOVDDUP_shuffle_mask)))]>;
def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"movddup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (vector_shuffle
- (scalar_to_vector (loadf64 addr:$src)),
- (undef),
- SSE_splat_lo_mask)))]>;
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle
+ (scalar_to_vector (loadf64 addr:$src)),
+ (undef), MOVDDUP_shuffle_mask)))]>;
+
+def : Pat<(vector_shuffle
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef), MOVDDUP_shuffle_mask),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(vector_shuffle
+ (memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
// Arithmetic
let Constraints = "$src1 = $dst" in {
diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll
new file mode 100644
index 0000000..5648356
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-22.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep shuf
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse3 | grep movlhps | count 2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse3 | grep movddup | count 1
+
+define <4 x float> @t1(<4 x float> %a) nounwind {
+entry:
+ %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp1
+}
+
+define <4 x i32> @t2(<4 x i32>* %a) nounwind {
+entry:
+ %tmp1 = load <4 x i32>* %a;
+ %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp2
+}