diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/InstPrinter/X86InstComments.cpp | 5 | ||||
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.cpp | 20 | ||||
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 86 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 13 |
7 files changed, 132 insertions, 0 deletions
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index cb4996b..12f7174 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -225,6 +225,11 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VPERM2F128rr: + DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + break; } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index fe0b3a2..aeb3309 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -220,4 +220,24 @@ void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm, } } +void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned HalfSize = VT.getVectorNumElements()/2; + unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; + unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; + + for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i) + ShuffleMask.push_back(i); + for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i) + ShuffleMask.push_back(i); +} + +void DecodeVPERM2F128Mask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only + // has information about the instruction and not the types. So for + // instruction comments purpose, assume the 256-bit vector is v4i64. + return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask); +} + } // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 1b11c6e..58193e6 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -97,6 +97,11 @@ void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm, void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeVPERM2F128Mask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + } // llvm namespace #endif diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 10ab707..4c83a72 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2753,6 +2753,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::VPERMILPSY: case X86ISD::VPERMILPD: case X86ISD::VPERMILPDY: + case X86ISD::VPERM2F128: return true; } return false; @@ -2795,6 +2796,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: + case X86ISD::VPERM2F128: return DAG.getNode(Opc, dl, VT, V1, V2, DAG.getConstant(TargetMask, MVT::i8)); } @@ -3033,6 +3035,17 @@ static bool isUndefOrEqual(int Val, int CmpVal) { return false; } +/// isUndefOrInRange - Return true if every element in Mask, begining from +/// position Pos and ending in Pos+Size, falls within the specified sequential +/// range (L, L+Pos]. or is undef. +static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask, + int Pos, int Size, int Low) { + for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (!isUndefOrEqual(Mask[i], Low)) + return false; + return true; +} + /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. @@ -3444,6 +3457,67 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) { return ::isMOVLMask(M, N->getValueType(0)); } +/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered +/// as permutations between 128-bit chunks or halves. As an example: this +/// shuffle bellow: +/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> +/// The first half comes from the second half of V1 and the second half from the +/// the second half of V2. +static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + return false; + + // The shuffle result is divided into half A and half B. In total the two + // sources have 4 halves, namely: C, D, E, F. The final values of A and + // B must come from C, D, E or F. + int HalfSize = VT.getVectorNumElements()/2; + bool MatchA = false, MatchB = false; + + // Check if A comes from one of C, D, E, F. + for (int Half = 0; Half < 4; ++Half) { + if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) { + MatchA = true; + break; + } + } + + // Check if B comes from one of C, D, E, F. + for (int Half = 0; Half < 4; ++Half) { + if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) { + MatchB = true; + break; + } + } + + return MatchA && MatchB; +} + +/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERM2F128 instructions. +static unsigned getShuffleVPERM2F128Immediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + + int HalfSize = VT.getVectorNumElements()/2; + + int FstHalf = 0, SndHalf = 0; + for (int i = 0; i < HalfSize; ++i) { + if (SVOp->getMaskElt(i) > 0) { + FstHalf = SVOp->getMaskElt(i)/HalfSize; + break; + } + } + for (int i = HalfSize; i < HalfSize*2; ++i) { + if (SVOp->getMaskElt(i) > 0) { + SndHalf = SVOp->getMaskElt(i)/HalfSize; + break; + } + } + + return (FstHalf | (SndHalf << 4)); +} + /// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to VPERMILPD*. /// Note that VPERMIL mask matching is different depending whether theunderlying @@ -4317,6 +4391,11 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); break; + case X86ISD::VPERM2F128: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; default: assert("not implemented for target shuffle node"); return SDValue(); @@ -6335,6 +6414,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, getShuffleVPERMILPDImmediate(SVOp), DAG); + // Handle VPERM2F128 permutations + if (isVPERM2F128Mask(M, VT, Subtarget)) + return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2, + getShuffleVPERM2F128Immediate(SVOp), DAG); + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -10052,6 +10136,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD"; case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY"; + case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -13134,6 +13219,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERMILPSY: case X86ISD::VPERMILPD: case X86ISD::VPERMILPDY: + case X86ISD::VPERM2F128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 76267c1..0aaef2a 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -275,6 +275,7 @@ namespace llvm { VPERMILPSY, VPERMILPD, VPERMILPDY, + VPERM2F128, // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 670bbc5..2ccf4c4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -158,6 +158,8 @@ def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>; def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>; +def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a8d6a36..2f8b2a7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5695,6 +5695,19 @@ def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; + //===----------------------------------------------------------------------===// // VZERO - Zero YMM registers // |