diff options
author | Evan Cheng <evan.cheng@apple.com> | 2007-12-07 08:07:39 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2007-12-07 08:07:39 +0000 |
commit | 8a86c3f3aee55f68cc227362031596edd93f7170 (patch) | |
tree | cd1d3f5439f13bcb1df3e88112e5332b92b14d01 | |
parent | 050fe638a5e543674133af6abceb1f0967b84134 (diff) | |
download | external_llvm-8a86c3f3aee55f68cc227362031596edd93f7170.zip external_llvm-8a86c3f3aee55f68cc227362031596edd93f7170.tar.gz external_llvm-8a86c3f3aee55f68cc227362031596edd93f7170.tar.bz2 |
Much improved v8i16 shuffles. (Step 1).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44676 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 151 | ||||
-rw-r--r-- | test/CodeGen/X86/2007-12-05-VectorShuffle.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_shuffle-12.ll | 37 |
3 files changed, 163 insertions, 34 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 15286cc..2653481 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2754,10 +2754,33 @@ static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, } std::swap(V1, V2); - Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); + Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); } +static +SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) { + MVT::ValueType MaskVT = Mask.getValueType(); + MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); + unsigned NumElems = Mask.getNumOperands(); + SmallVector<SDOperand, 8> MaskVec; + for (unsigned i = 0; i != NumElems; ++i) { + SDOperand Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); + continue; + } + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); + if (Val < NumElems) + MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); + else + MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); +} + + /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper @@ -3282,6 +3305,102 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { return SDOperand(); } +static +SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2, + SDOperand PermMask, SelectionDAG &DAG, + TargetLowering &TLI) { + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8); + MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); + if (isPSHUFHW_PSHUFLWMask(PermMask.Val)) { + // Handle v8i16 shuffle high / low shuffle node pair. + SmallVector<SDOperand, 8> MaskVec; + for (unsigned i = 0; i != 4; ++i) + MaskVec.push_back(PermMask.getOperand(i)); + for (unsigned i = 4; i != 8; ++i) + MaskVec.push_back(DAG.getConstant(i, MaskEVT)); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask); + MaskVec.clear(); + for (unsigned i = 0; i != 4; ++i) + MaskVec.push_back(DAG.getConstant(i, MaskEVT)); + for (unsigned i = 4; i != 8; ++i) + MaskVec.push_back(PermMask.getOperand(i)); + Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); + return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask); + } + + // Lower than into extracts and inserts but try to do as few as possible. + // First, let's find out how many elements are already in the right order. + unsigned V1InOrder = 0; + unsigned V1FromV1 = 0; + unsigned V2InOrder = 0; + unsigned V2FromV2 = 0; + SmallVector<unsigned, 8> V1Elts; + SmallVector<unsigned, 8> V2Elts; + for (unsigned i = 0; i < 8; ++i) { + SDOperand Elt = PermMask.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) { + V1Elts.push_back(i); + V2Elts.push_back(i); + ++V1InOrder; + ++V2InOrder; + } else { + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + if (EltIdx == i) { + V1Elts.push_back(i); + V2Elts.push_back(i+8); + ++V1InOrder; + } else if (EltIdx == i+8) { + V1Elts.push_back(i+8); + V2Elts.push_back(i); + ++V2InOrder; + } else { + V1Elts.push_back(EltIdx); + V2Elts.push_back(EltIdx); + if (EltIdx < 8) + ++V1FromV1; + else + ++V2FromV2; + } + } + } + + if (V2InOrder > V1InOrder) { + PermMask = CommuteVectorShuffleMask(PermMask, DAG); + std::swap(V1, V2); + std::swap(V1Elts, V2Elts); + std::swap(V1FromV1, V2FromV2); + } + + MVT::ValueType PtrVT = TLI.getPointerTy(); + if (V1FromV1) { + // If there are elements that are from V1 but out of place, + // then first sort them in place + SmallVector<SDOperand, 8> MaskVec; + for (unsigned i = 0; i < 8; ++i) { + unsigned EltIdx = V1Elts[i]; + if (EltIdx >= 8) + MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); + else + MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); + } + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); + } + + // Now let's insert elements from the other vector. + for (unsigned i = 0; i < 8; ++i) { + unsigned EltIdx = V1Elts[i]; + if (EltIdx < 8) + continue; + SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, + DAG.getConstant(EltIdx - 8, PtrVT)); + V1 = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V1, ExtOp, + DAG.getConstant(i, PtrVT)); + } + return V1; +} + SDOperand X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { SDOperand V1 = Op.getOperand(0); @@ -3406,27 +3525,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { if (X86::isSHUFPMask(PermMask.Val) && MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. return Op; - - // Handle v8i16 shuffle high / low shuffle node pair. - if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { - MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); - SmallVector<SDOperand, 8> MaskVec; - for (unsigned i = 0; i != 4; ++i) - MaskVec.push_back(PermMask.getOperand(i)); - for (unsigned i = 4; i != 8; ++i) - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, - &MaskVec[0], MaskVec.size()); - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); - MaskVec.clear(); - for (unsigned i = 0; i != 4; ++i) - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - for (unsigned i = 4; i != 8; ++i) - MaskVec.push_back(PermMask.getOperand(i)); - Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); - } } else { // Floating point cases in the other order. if (X86::isSHUFPMask(PermMask.Val)) @@ -3441,9 +3539,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { } } - if (NumElems == 4 && - // Don't do this for MMX. - MVT::getSizeInBits(VT) != 64) { + // Handle v8i16 specifically since SSE can do byte extraction and insertion. + if (VT == MVT::v8i16) + return LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); + + if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { + // Don't do this for MMX. MVT::ValueType MaskVT = PermMask.getValueType(); MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); SmallVector<std::pair<int, int>, 8> Locs; diff --git a/test/CodeGen/X86/2007-12-05-VectorShuffle.ll b/test/CodeGen/X86/2007-12-05-VectorShuffle.ll deleted file mode 100644 index 8933a63..0000000 --- a/test/CodeGen/X86/2007-12-05-VectorShuffle.ll +++ /dev/null @@ -1,9 +0,0 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 - -define void @test(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { - %tmp1 = load <8 x i16>* %A - %tmp2 = load <8 x i16>* %B - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > - store <8 x i16> %tmp3, <8 x i16>* %res - ret void -} diff --git a/test/CodeGen/X86/vec_shuffle-12.ll b/test/CodeGen/X86/vec_shuffle-12.ll new file mode 100644 index 0000000..f66f3bb --- /dev/null +++ b/test/CodeGen/X86/vec_shuffle-12.ll @@ -0,0 +1,37 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 7 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 7 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuf | count 2 + +define void @t1(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > + store <8 x i16> %tmp3, <8 x i16>* %res + ret void +} + +define void @t2(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7 > + store <8 x i16> %tmp3, <8 x i16>* %res + ret void +} + +define void @t3(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 > + store <8 x i16> %tmp3, <8 x i16>* %res + ret void +} + +define void @t4(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > + store <8 x i16> %tmp3, <8 x i16>* %res + ret void +} |