diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 61 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-vshufp.ll | 14 |
2 files changed, 60 insertions, 15 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5a03f44..7e35fd2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3431,6 +3431,41 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { return Mask; } +/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming +/// the two vector operands have swapped position. +static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { + unsigned NumElems = VT.getVectorNumElements(); + for (unsigned i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElems) + Mask[i] = idx + NumElems; + else + Mask[i] = idx - NumElems; + } +} + +/// isCommutedVSHUFP() - Return true if swapping operands will +/// allow to use the "vshufpd" or "vshufps" instruction +/// for 256-bit vectors +static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + + unsigned NumElems = VT.getVectorNumElements(); + if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8))) + return false; + + SmallVector<int, 8> CommutedMask; + for (unsigned i = 0; i < NumElems; ++i) + CommutedMask.push_back(Mask[i]); + + CommuteVectorShuffleMask(CommutedMask, VT); + return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget): + isVSHUFPSYMask(CommutedMask, VT, Subtarget); +} + + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 128-bit /// SHUFPS and SHUFPD. @@ -4233,21 +4268,6 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SVOp->getOperand(0), &MaskVec[0]); } -/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming -/// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - for (unsigned i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElems) - Mask[i] = idx + NumElems; - else - Mask[i] = idx - NumElems; - } -} - /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper @@ -6986,6 +7006,17 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, getShuffleVSHUFPDYImmediate(SVOp), DAG); + // Try to swap operands in the node to match x86 shuffle ops + if (isCommutedVSHUFPMask(M, VT, Subtarget)) { + // Now we need to commute operands. + SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG)); + V1 = SVOp->getOperand(0); + V2 = SVOp->getOperand(1); + unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp): + getShuffleVSHUFPSYImmediate(SVOp); + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG); + } + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll index f06548d..7ec3a44 100644 --- a/test/CodeGen/X86/avx-vshufp.ll +++ b/test/CodeGen/X86/avx-vshufp.ll @@ -27,3 +27,17 @@ entry: %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef> ret <4 x double> %shuffle } + +; CHECK: vshufps $-55, %ymm +define <8 x float> @E(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 10, i32 0, i32 3, i32 13, i32 14, i32 4, i32 7> + ret <8 x float> %shuffle +} + +; CHECK: vshufpd $8, %ymm +define <4 x double> @F(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 7> + ret <4 x double> %shuffle +} |