aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp5
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp20
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h5
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp86
-rw-r--r--lib/Target/X86/X86ISelLowering.h1
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td13
7 files changed, 132 insertions, 0 deletions
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index cb4996b..12f7174 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -225,6 +225,11 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VPERM2F128rr:
+ DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
}
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index fe0b3a2..aeb3309 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -220,4 +220,24 @@ void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
}
}
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned HalfSize = VT.getVectorNumElements()/2;
+ unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
+ unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
+
+ for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+}
+
+void DecodeVPERM2F128Mask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
+ // has information about the instruction and not the types. So for
+ // instruction comments purpose, assume the 256-bit vector is v4i64.
+ return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
+}
+
} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 1b11c6e..58193e6 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -97,6 +97,11 @@ void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeVPERM2F128Mask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 10ab707..4c83a72 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2753,6 +2753,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPERMILPSY:
case X86ISD::VPERMILPD:
case X86ISD::VPERMILPDY:
+ case X86ISD::VPERM2F128:
return true;
}
return false;
@@ -2795,6 +2796,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PALIGN:
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
+ case X86ISD::VPERM2F128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
@@ -3033,6 +3035,17 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
return false;
}
+/// isUndefOrInRange - Return true if every element in Mask, begining from
+/// position Pos and ending in Pos+Size, falls within the specified sequential
+/// range (L, L+Pos]. or is undef.
+static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
+ int Pos, int Size, int Low) {
+ for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (!isUndefOrEqual(Mask[i], Low))
+ return false;
+ return true;
+}
+
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
@@ -3444,6 +3457,67 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
return ::isMOVLMask(M, N->getValueType(0));
}
+/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// as permutations between 128-bit chunks or halves. As an example: this
+/// shuffle bellow:
+/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
+/// The first half comes from the second half of V1 and the second half from the
+/// the second half of V2.
+static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ // The shuffle result is divided into half A and half B. In total the two
+ // sources have 4 halves, namely: C, D, E, F. The final values of A and
+ // B must come from C, D, E or F.
+ int HalfSize = VT.getVectorNumElements()/2;
+ bool MatchA = false, MatchB = false;
+
+ // Check if A comes from one of C, D, E, F.
+ for (int Half = 0; Half < 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
+ MatchA = true;
+ break;
+ }
+ }
+
+ // Check if B comes from one of C, D, E, F.
+ for (int Half = 0; Half < 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
+ MatchB = true;
+ break;
+ }
+ }
+
+ return MatchA && MatchB;
+}
+
+/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
+static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int HalfSize = VT.getVectorNumElements()/2;
+
+ int FstHalf = 0, SndHalf = 0;
+ for (int i = 0; i < HalfSize; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ FstHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+ for (int i = HalfSize; i < HalfSize*2; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ SndHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+
+ return (FstHalf | (SndHalf << 4));
+}
+
/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
@@ -4317,6 +4391,11 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
break;
+ case X86ISD::VPERM2F128:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
default:
assert("not implemented for target shuffle node");
return SDValue();
@@ -6335,6 +6414,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
getShuffleVPERMILPDImmediate(SVOp), DAG);
+ // Handle VPERM2F128 permutations
+ if (isVPERM2F128Mask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
+ getShuffleVPERM2F128Immediate(SVOp), DAG);
+
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
// lower it into other known shuffles. FIXME: this isn't true yet, but
@@ -10052,6 +10136,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY";
+ case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -13134,6 +13219,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERMILPSY:
case X86ISD::VPERMILPD:
case X86ISD::VPERMILPDY:
+ case X86ISD::VPERM2F128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 76267c1..0aaef2a 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -275,6 +275,7 @@ namespace llvm {
VPERMILPSY,
VPERMILPD,
VPERMILPDY,
+ VPERM2F128,
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 670bbc5..2ccf4c4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -158,6 +158,8 @@ def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
+def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a8d6a36..2f8b2a7 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5695,6 +5695,19 @@ def : Pat<(int_x86_avx_vperm2f128_si_256
VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
//