aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2007-12-07 08:07:39 +0000
committerEvan Cheng <evan.cheng@apple.com>2007-12-07 08:07:39 +0000
commit8a86c3f3aee55f68cc227362031596edd93f7170 (patch)
treecd1d3f5439f13bcb1df3e88112e5332b92b14d01
parent050fe638a5e543674133af6abceb1f0967b84134 (diff)
downloadexternal_llvm-8a86c3f3aee55f68cc227362031596edd93f7170.zip
external_llvm-8a86c3f3aee55f68cc227362031596edd93f7170.tar.gz
external_llvm-8a86c3f3aee55f68cc227362031596edd93f7170.tar.bz2
Much improved v8i16 shuffles. (Step 1).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44676 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp151
-rw-r--r--test/CodeGen/X86/2007-12-05-VectorShuffle.ll9
-rw-r--r--test/CodeGen/X86/vec_shuffle-12.ll37
3 files changed, 163 insertions, 34 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 15286cc..2653481 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2754,10 +2754,33 @@ static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
}
std::swap(V1, V2);
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
+static
+SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = Mask.getValueType();
+ MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
+ unsigned NumElems = Mask.getNumOperands();
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+ continue;
+ }
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val < NumElems)
+ MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
+ else
+ MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems);
+}
+
+
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
@@ -3282,6 +3305,102 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
return SDOperand();
}
+static
+SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
+ MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ if (isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
+ // Handle v8i16 shuffle high / low shuffle node pair.
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(PermMask.getOperand(i));
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask);
+ MaskVec.clear();
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(PermMask.getOperand(i));
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask);
+ }
+
+ // Lower than into extracts and inserts but try to do as few as possible.
+ // First, let's find out how many elements are already in the right order.
+ unsigned V1InOrder = 0;
+ unsigned V1FromV1 = 0;
+ unsigned V2InOrder = 0;
+ unsigned V2FromV2 = 0;
+ SmallVector<unsigned, 8> V1Elts;
+ SmallVector<unsigned, 8> V2Elts;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = PermMask.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ V1Elts.push_back(i);
+ V2Elts.push_back(i);
+ ++V1InOrder;
+ ++V2InOrder;
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i) {
+ V1Elts.push_back(i);
+ V2Elts.push_back(i+8);
+ ++V1InOrder;
+ } else if (EltIdx == i+8) {
+ V1Elts.push_back(i+8);
+ V2Elts.push_back(i);
+ ++V2InOrder;
+ } else {
+ V1Elts.push_back(EltIdx);
+ V2Elts.push_back(EltIdx);
+ if (EltIdx < 8)
+ ++V1FromV1;
+ else
+ ++V2FromV2;
+ }
+ }
+ }
+
+ if (V2InOrder > V1InOrder) {
+ PermMask = CommuteVectorShuffleMask(PermMask, DAG);
+ std::swap(V1, V2);
+ std::swap(V1Elts, V2Elts);
+ std::swap(V1FromV1, V2FromV2);
+ }
+
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ if (V1FromV1) {
+ // If there are elements that are from V1 but out of place,
+ // then first sort them in place
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < 8; ++i) {
+ unsigned EltIdx = V1Elts[i];
+ if (EltIdx >= 8)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ else
+ MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
+ }
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
+ }
+
+ // Now let's insert elements from the other vector.
+ for (unsigned i = 0; i < 8; ++i) {
+ unsigned EltIdx = V1Elts[i];
+ if (EltIdx < 8)
+ continue;
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ V1 = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V1, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return V1;
+}
+
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
@@ -3406,27 +3525,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
if (X86::isSHUFPMask(PermMask.Val) &&
MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
return Op;
-
- // Handle v8i16 shuffle high / low shuffle node pair.
- if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
- SmallVector<SDOperand, 8> MaskVec;
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
- &MaskVec[0], MaskVec.size());
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
- MaskVec.clear();
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
- }
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
@@ -3441,9 +3539,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
}
}
- if (NumElems == 4 &&
- // Don't do this for MMX.
- MVT::getSizeInBits(VT) != 64) {
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16)
+ return LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
+
+ if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
+ // Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs;
diff --git a/test/CodeGen/X86/2007-12-05-VectorShuffle.ll b/test/CodeGen/X86/2007-12-05-VectorShuffle.ll
deleted file mode 100644
index 8933a63..0000000
--- a/test/CodeGen/X86/2007-12-05-VectorShuffle.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
-
-define void @test(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
-}
diff --git a/test/CodeGen/X86/vec_shuffle-12.ll b/test/CodeGen/X86/vec_shuffle-12.ll
new file mode 100644
index 0000000..f66f3bb
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-12.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 7
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 7
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuf | count 2
+
+define void @t1(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+ store <8 x i16> %tmp3, <8 x i16>* %res
+ ret void
+}
+
+define void @t2(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7 >
+ store <8 x i16> %tmp3, <8 x i16>* %res
+ ret void
+}
+
+define void @t3(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+ store <8 x i16> %tmp3, <8 x i16>* %res
+ ret void
+}
+
+define void @t4(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+ store <8 x i16> %tmp3, <8 x i16>* %res
+ ret void
+}