diff options
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 1651 |
1 files changed, 661 insertions, 990 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d51435c..0236602 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,7 +45,8 @@ static cl::opt<bool> DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); // Forward declarations. -static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl); +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2); X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM) { @@ -1667,9 +1668,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); - Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, - DAG.getUNDEF(MVT::v2i64), Arg, - getMOVLMask(2, DAG, dl)); + Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); break; } } @@ -2138,186 +2137,156 @@ static bool hasFPCMov(unsigned X86CC) { } } -/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return -/// true if Op is undef or if its value falls within the specified range (L, H]. -static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { - if (Op.getOpcode() == ISD::UNDEF) - return true; - - unsigned Val = cast<ConstantSDNode>(Op)->getZExtValue(); - return (Val >= Low && Val < Hi); +/// isUndefOrInRange - Return true if Val is undef or if its value falls within +/// the specified range (L, H]. +static bool isUndefOrInRange(int Val, int Low, int Hi) { + return (Val < 0) || (Val >= Low && Val < Hi); } -/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return -/// true if Op is undef or if its value equal to the specified value. -static bool isUndefOrEqual(SDValue Op, unsigned Val) { - if (Op.getOpcode() == ISD::UNDEF) +/// isUndefOrEqual - Val is either less than zero (undef) or equal to the +/// specified value. +static bool isUndefOrEqual(int Val, int CmpVal) { + if (Val < 0 || Val == CmpVal) return true; - return cast<ConstantSDNode>(Op)->getZExtValue() == Val; + return false; } -/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFD. -bool X86::isPSHUFDMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 2 && N->getNumOperands() != 4) - return false; - - // Check if the value doesn't reference the second vector. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast<ConstantSDNode>(Arg)->getZExtValue() >= e) - return false; - } - - return true; +/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference +/// the second operand. +static bool isPSHUFDMask(const int *Mask, MVT VT) { + if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return (Mask[0] < 2 && Mask[1] < 2); + return false; } -/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFHW. -bool X86::isPSHUFHWMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { + return ::isPSHUFDMask(N->getMask(), N->getValueType(0)); +} - if (N->getNumOperands() != 8) +/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFHW. +static bool isPSHUFHWMask(const int *Mask, MVT VT) { + if (VT != MVT::v8i16) return false; - - // Lower quadword copied in order. - for (unsigned i = 0; i != 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast<ConstantSDNode>(Arg)->getZExtValue() != i) + + // Lower quadword copied in order or undef. + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 0 && Mask[i] != i) return false; - } - + // Upper quadword shuffled. - for (unsigned i = 4; i != 8; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val < 4 || Val > 7) + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) return false; - } - + return true; } -/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFLW. -bool X86::isPSHUFLWMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { + return ::isPSHUFHWMask(N->getMask(), N->getValueType(0)); +} - if (N->getNumOperands() != 8) +/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFLW. +static bool isPSHUFLWMask(const int *Mask, MVT VT) { + if (VT != MVT::v8i16) return false; - + // Upper quadword copied in order. - for (unsigned i = 4; i != 8; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && Mask[i] != i) return false; - + // Lower quadword shuffled. - for (unsigned i = 0; i != 4; ++i) - if (!isUndefOrInRange(N->getOperand(i), 0, 4)) + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 4) return false; - + return true; } +bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { + return ::isPSHUFLWMask(N->getMask(), N->getValueType(0)); +} + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. -template<class SDOperand> -static bool isSHUFPMask(SDOperand *Elems, unsigned NumElems) { - if (NumElems != 2 && NumElems != 4) return false; - - unsigned Half = NumElems / 2; - for (unsigned i = 0; i < Half; ++i) - if (!isUndefOrInRange(Elems[i], 0, NumElems)) +static bool isSHUFPMask(const int *Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) return false; - for (unsigned i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - + return true; } -bool X86::isSHUFPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); +bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { + return ::isSHUFPMask(N->getMask(), N->getValueType(0)); } /// isCommutedSHUFP - Returns true if the shuffle mask is exactly /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. -template<class SDOperand> -static bool isCommutedSHUFP(SDOperand *Ops, unsigned NumOps) { - if (NumOps != 2 && NumOps != 4) return false; - - unsigned Half = NumOps / 2; - for (unsigned i = 0; i < Half; ++i) - if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) +static bool isCommutedSHUFPMask(const int *Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - for (unsigned i = Half; i < NumOps; ++i) - if (!isUndefOrInRange(Ops[i], 0, NumOps)) + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) return false; return true; } -static bool isCommutedSHUFP(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); +static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { + return isCommutedSHUFPMask(N->getMask(), N->getValueType(0)); } /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. -bool X86::isMOVHLPSMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getOperand(0), 6) && - isUndefOrEqual(N->getOperand(1), 7) && - isUndefOrEqual(N->getOperand(2), 2) && - isUndefOrEqual(N->getOperand(3), 3); -} - -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) - return false; - - // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getOperand(0), 2) && - isUndefOrEqual(N->getOperand(1), 3) && - isUndefOrEqual(N->getOperand(2), 2) && - isUndefOrEqual(N->getOperand(3), 3); + const int *Mask = N->getMask(); + return isUndefOrEqual(Mask[0], 6) && + isUndefOrEqual(Mask[1], 7) && + isUndefOrEqual(Mask[2], 2) && + isUndefOrEqual(Mask[3], 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. -bool X86::isMOVLPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); - unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; + const int *Mask = N->getMask(); for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) + if (!isUndefOrEqual(Mask[i], i + NumElems)) return false; for (unsigned i = NumElems/2; i < NumElems; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + if (!isUndefOrEqual(Mask[i], i)) return false; return true; @@ -2326,37 +2295,49 @@ bool X86::isMOVLPMask(SDNode *N) { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// and MOVLHPS. -bool X86::isMOVHPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); - unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; + const int *Mask = N->getMask(); for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = 0; i < NumElems/2; ++i) { - SDValue Arg = N->getOperand(i + NumElems/2); - if (!isUndefOrEqual(Arg, i + NumElems)) + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems)) return false; - } return true; } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 + const int *Mask = N->getMask(); + return isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3) && + isUndefOrEqual(Mask[2], 2) && isUndefOrEqual(Mask[3], 3); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -template<class SDOperand> -bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, - bool V2IsSplat = false) { +static bool isUNPCKLMask(const int *Mask, MVT VT, bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { - SDValue BitI = Elts[i]; - SDValue BitI1 = Elts[i+1]; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (V2IsSplat) { @@ -2367,26 +2348,23 @@ bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, return false; } } - return true; } -bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + return ::isUNPCKLMask(N->getMask(), N->getValueType(0), V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -template<class SDOperand> -bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, - bool V2IsSplat = false) { +static bool isUNPCKHMask(const int *Mask, MVT VT, bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { - SDValue BitI = Elts[i]; - SDValue BitI1 = Elts[i+1]; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j + NumElts/2)) return false; if (V2IsSplat) { @@ -2397,270 +2375,166 @@ bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, return false; } } - return true; } -bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + return ::isUNPCKHMask(N->getMask(), N->getValueType(0), V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned NumElems = N->getNumOperands(); +static bool isUNPCKL_v_undef_Mask(const int *Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { - SDValue BitI = N->getOperand(i); - SDValue BitI1 = N->getOperand(i+1); - + + for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; } +bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { + return ::isUNPCKL_v_undef_Mask(N->getMask(), N->getValueType(0)); +} + /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned NumElems = N->getNumOperands(); +static bool isUNPCKH_v_undef_Mask(const int *Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { - SDValue BitI = N->getOperand(i); - SDValue BitI1 = N->getOperand(i + 1); - + + for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; } +bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { + return ::isUNPCKH_v_undef_Mask(N->getMask(), N->getValueType(0)); +} + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -template<class SDOperand> -static bool isMOVLMask(SDOperand *Elts, unsigned NumElts) { +static bool isMOVLMask(const int *Mask, MVT VT) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4) return false; - - if (!isUndefOrEqual(Elts[0], NumElts)) + + if (!isUndefOrEqual(Mask[0], NumElts)) return false; - - for (unsigned i = 1; i < NumElts; ++i) { - if (!isUndefOrEqual(Elts[i], i)) + + for (int i = 1; i < NumElts; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - } - + return true; } -bool X86::isMOVLMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isMOVLMask(N->op_begin(), N->getNumOperands()); +bool X86::isMOVLMask(ShuffleVectorSDNode *N) { + return ::isMOVLMask(N->getMask(), N->getValueType(0)); } /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -template<class SDOperand> -static bool isCommutedMOVL(SDOperand *Ops, unsigned NumOps, - bool V2IsSplat = false, - bool V2IsUndef = false) { +static bool isCommutedMOVLMask(const int *Mask, MVT VT, bool V2IsSplat = false, + bool V2IsUndef = false) { + int NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; - - if (!isUndefOrEqual(Ops[0], 0)) + + if (!isUndefOrEqual(Mask[0], 0)) return false; - - for (unsigned i = 1; i < NumOps; ++i) { - SDValue Arg = Ops[i]; - if (!(isUndefOrEqual(Arg, i+NumOps) || - (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || - (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) + + for (int i = 1; i < NumOps; ++i) + if (!(isUndefOrEqual(Mask[i], i+NumOps) || + (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || + (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) return false; - } - + return true; } -static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, +static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, bool V2IsUndef = false) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return isCommutedMOVL(N->op_begin(), N->getNumOperands(), - V2IsSplat, V2IsUndef); + return isCommutedMOVLMask(N->getMask(), N->getValueType(0), V2IsSplat, + V2IsUndef); } /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. -bool X86::isMOVSHDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect 1, 1, 3, 3 - for (unsigned i = 0; i < 2; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val != 1) return false; - } + const int *Mask = N->getMask(); + for (unsigned i = 0; i < 2; ++i) + if (Mask[i] >=0 && Mask[i] != 1) + return false; bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val != 3) return false; - HasHi = true; + if (Mask[i] >= 0 && Mask[i] != 3) + return false; + if (Mask[i] == 3) + HasHi = true; } - // Don't use movshdup if it can be done with a shufps. + // FIXME: verify that matching u, u, 3, 3 is what we want. return HasHi; } /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. -bool X86::isMOVSLDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect 0, 0, 2, 2 - for (unsigned i = 0; i < 2; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val != 0) return false; - } + const int *Mask = N->getMask(); + for (unsigned i = 0; i < 2; ++i) + if (Mask[i] > 0) + return false; bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val != 2) return false; - HasHi = true; - } - - // Don't use movshdup if it can be done with a shufps. - return HasHi; -} - -/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a identity operation on the LHS or RHS. -static bool isIdentityMask(SDNode *N, bool RHS = false) { - unsigned NumElems = N->getNumOperands(); - for (unsigned i = 0; i < NumElems; ++i) - if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + if (Mask[i] >= 0 && Mask[i] != 2) return false; - return true; -} - -/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies -/// a splat of a single element. -static bool isSplatMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - // This is a splat operation if each element of the permute is the same, and - // if the value doesn't reference the second vector. - unsigned NumElems = N->getNumOperands(); - SDValue ElementBase; - unsigned i = 0; - for (; i != NumElems; ++i) { - SDValue Elt = N->getOperand(i); - if (isa<ConstantSDNode>(Elt)) { - ElementBase = Elt; - break; - } + if (Mask[i] == 2) + HasHi = true; } - - if (!ElementBase.getNode()) - return false; - - for (; i != NumElems; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (Arg != ElementBase) return false; - } - - // Make sure it is a splat of the first vector operand. - return cast<ConstantSDNode>(ElementBase)->getZExtValue() < NumElems; -} - -/// getSplatMaskEltNo - Given a splat mask, return the index to the element -/// we want to splat. -static SDValue getSplatMaskEltNo(SDNode *N) { - assert(isSplatMask(N) && "Not a splat mask"); - unsigned NumElems = N->getNumOperands(); - SDValue ElementBase; - unsigned i = 0; - for (; i != NumElems; ++i) { - SDValue Elt = N->getOperand(i); - if (isa<ConstantSDNode>(Elt)) - return Elt; - } - assert(0 && " No splat value found!"); - return SDValue(); -} - - -/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies -/// a splat of a single element and it's a 2 or 4 element mask. -bool X86::isSplatMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - // We can only splat 64-bit, and 32-bit quantities with a single instruction. - if (N->getNumOperands() != 4 && N->getNumOperands() != 2) - return false; - return ::isSplatMask(N); -} - -/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a splat of zero element. -bool X86::isSplatLoMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) - if (!isUndefOrEqual(N->getOperand(i), 0)) - return false; - return true; + // Don't use movsldup if it can be done with a shufps. + return HasHi; } /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. -bool X86::isMOVDDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned e = N->getNumOperands() / 2; - for (unsigned i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) +bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { + int e = N->getValueType(0).getVectorNumElements() / 2; + const int *Mask = N->getMask(); + + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getOperand(e+i), i)) + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(Mask[e+i], i)) return false; return true; } @@ -2669,20 +2543,20 @@ bool X86::isMOVDDUPMask(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { - unsigned NumOperands = N->getNumOperands(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + int NumOperands = SVOp->getValueType(0).getVectorNumElements(); + const int *MaskP = SVOp->getMask(); + unsigned Shift = (NumOperands == 4) ? 2 : 1; unsigned Mask = 0; - for (unsigned i = 0; i < NumOperands; ++i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(NumOperands-i-1); - if (Arg.getOpcode() != ISD::UNDEF) - Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + for (int i = 0; i < NumOperands; ++i) { + int Val = MaskP[NumOperands-i-1]; + if (Val < 0) Val = 0; if (Val >= NumOperands) Val -= NumOperands; Mask |= Val; if (i != NumOperands - 1) Mask <<= Shift; } - return Mask; } @@ -2690,19 +2564,16 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW /// instructions. unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { + const int *MaskP = cast<ShuffleVectorSDNode>(N)->getMask(); unsigned Mask = 0; // 8 nodes, but we only care about the last 4. for (unsigned i = 7; i >= 4; --i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + int Val = MaskP[i]; + if (Val >= 0) Mask |= (Val - 4); - } if (i != 4) Mask <<= 2; } - return Mask; } @@ -2710,90 +2581,71 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { + const int *MaskP = cast<ShuffleVectorSDNode>(N)->getMask(); unsigned Mask = 0; // 8 nodes, but we only care about the first 4. for (int i = 3; i >= 0; --i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) - Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - Mask |= Val; + int Val = MaskP[i]; + if (Val >= 0) + Mask |= Val; if (i != 0) Mask <<= 2; } - return Mask; } -/// CommuteVectorShuffle - Swap vector_shuffle operands as well as -/// values in ther permute mask. -static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, - SDValue &V2, SDValue &Mask, - SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT MaskVT = Mask.getValueType(); - MVT EltVT = MaskVT.getVectorElementType(); - unsigned NumElems = Mask.getNumOperands(); - SmallVector<SDValue, 8> MaskVec; - DebugLoc dl = Op.getDebugLoc(); - - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MaskVec.push_back(DAG.getUNDEF(EltVT)); - continue; - } - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val < NumElems) - MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); +/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in +/// their permute mask. +static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + const int *Mask = SVOp->getMask(); + SmallVector<int, 8> MaskVec; + + for (int i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) + MaskVec.push_back(idx); + else if (idx < NumElems) + MaskVec.push_back(idx + NumElems); else - MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + MaskVec.push_back(idx - NumElems); } - - std::swap(V1, V2); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), + SVOp->getOperand(0), &MaskVec[0]); } /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static -SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = Mask.getValueType(); - MVT EltVT = MaskVT.getVectorElementType(); - unsigned NumElems = Mask.getNumOperands(); - SmallVector<SDValue, 8> MaskVec; - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MaskVec.push_back(DAG.getUNDEF(EltVT)); +static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + for (int i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) continue; - } - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val < NumElems) - MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); + else if (idx < NumElems) + Mask[i] = idx + NumElems; else - MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + Mask[i] = idx - NumElems; } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); } - /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). -static bool ShouldXformToMOVHLPS(SDNode *Mask) { - unsigned NumElems = Mask->getNumOperands(); +static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { + int NumElems = Op->getValueType(0).getVectorNumElements(); + const int *Mask = Op->getMask(); + if (NumElems != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+2)) + if (!isUndefOrEqual(Mask[i], i+2)) return false; for (unsigned i = 2; i != 4; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+4)) + if (!isUndefOrEqual(Mask[i], i+4)) return false; return true; } @@ -2817,7 +2669,8 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). And since V1 will become the source of the /// MOVLP, it must be either a vector load or a scalar load to vector. -static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { +static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, + ShuffleVectorSDNode *Op) { if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) return false; // Is V2 is a vector load, don't do this transformation. We will try to use @@ -2825,14 +2678,16 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { if (ISD::isNON_EXTLoad(V2)) return false; - unsigned NumElems = Mask->getNumOperands(); + int NumElems = Op->getValueType(0).getVectorNumElements(); + const int *Mask = Op->getMask(); + if (NumElems != 2 && NumElems != 4) return false; - for (unsigned i = 0, e = NumElems/2; i != e; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i)) + for (int i = 0, e = NumElems/2; i != e; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = NumElems/2; i != NumElems; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) + for (int i = NumElems/2; i != NumElems; ++i) + if (!isUndefOrEqual(Mask[i], i+NumElems)) return false; return true; } @@ -2850,29 +2705,6 @@ static bool isSplatVector(SDNode *N) { return true; } -/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an undef. -static bool isUndefShuffle(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return false; - - SDValue V1 = N->getOperand(0); - SDValue V2 = N->getOperand(1); - SDValue Mask = N->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) - return false; - else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) - return false; - } - } - return true; -} - /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. static inline bool isZeroNode(SDValue Elt) { @@ -2883,34 +2715,26 @@ static inline bool isZeroNode(SDValue Elt) { } /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an zero vector. -static bool isZeroShuffle(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return false; - +/// to an zero vector. +/// FIXME: move to dag combiner? +static bool isZeroShuffle(ShuffleVectorSDNode *N) { SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - SDValue Mask = N->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) - continue; - - unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Idx < NumElems) { - unsigned Opc = V1.getNode()->getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) + const int *Mask = N->getMask(); + int NumElems = N->getValueType(0).getVectorNumElements(); + for (int i = 0; i != NumElems; ++i) { + int Idx = Mask[i]; + if (Idx >= NumElems) { + unsigned Opc = V2.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V1.getNode()->getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) return false; - } else if (Idx >= NumElems) { - unsigned Opc = V2.getNode()->getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) + } else if (Idx >= 0) { + unsigned Opc = V1.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) return false; } } @@ -2958,127 +2782,94 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. -static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { - assert(Mask.getOpcode() == ISD::BUILD_VECTOR); - +static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + const int *Mask = SVOp->getMask(); + bool Changed = false; - SmallVector<SDValue, 8> MaskVec; - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val > NumElems) { - Arg = DAG.getConstant(NumElems, Arg.getValueType()); - Changed = true; - } + SmallVector<int, 8> MaskVec; + + for (int i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx > NumElems) { + idx = NumElems; + Changed = true; } - MaskVec.push_back(Arg); + MaskVec.push_back(idx); } - if (Changed) - Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(), - Mask.getValueType(), - &MaskVec[0], MaskVec.size()); - return Mask; + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), + SVOp->getOperand(1), &MaskVec[0]); + return SDValue(SVOp, 0); } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - - SmallVector<SDValue, 8> MaskVec; - MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<int, 8> Mask; + Mask.push_back(NumElems); for (unsigned i = 1; i != NumElems; ++i) - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); -} - -/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation -/// of specified width. -static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 8> MaskVec; + Mask.push_back(i); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); +} + +/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. +static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<int, 8> Mask; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); + Mask.push_back(i); + Mask.push_back(i + NumElems); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation -/// of specified width. -static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); +/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. +static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); unsigned Half = NumElems/2; - SmallVector<SDValue, 8> MaskVec; + SmallVector<int, 8> Mask; for (unsigned i = 0; i != Half; ++i) { - MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); - MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); - } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); -} - -/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps -/// element #0 of a vector with the specified index, leaving the rest of the -/// elements in place. -static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, - SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 8> MaskVec; - // Element #0 of the result gets the elt we are replacing. - MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); - for (unsigned i = 1; i != NumElems; ++i) - MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + Mask.push_back(i + Half); + Mask.push_back(i + NumElems + Half); + } + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { - MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; - MVT VT = Op.getValueType(); - if (PVT == VT) - return Op; - SDValue V1 = Op.getOperand(0); - SDValue Mask = Op.getOperand(2); - unsigned MaskNumElems = Mask.getNumOperands(); - unsigned NumElems = MaskNumElems; - DebugLoc dl = Op.getDebugLoc(); - // Special handling of v4f32 -> v4i32. - if (VT != MVT::v4f32) { - // Find which element we want to splat. - SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode(); - unsigned EltNo = cast<ConstantSDNode>(EltNoNode)->getZExtValue(); - // unpack elements to the correct location - while (NumElems > 4) { - if (EltNo < NumElems/2) { - Mask = getUnpacklMask(MaskNumElems, DAG, dl); - } else { - Mask = getUnpackhMask(MaskNumElems, DAG, dl); - EltNo -= NumElems/2; - } - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, Mask); - NumElems >>= 1; +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, + bool HasSSE2) { + if (SV->getValueType(0).getVectorNumElements() <= 4) + return SDValue(SV, 0); + + MVT PVT = MVT::v4f32; + MVT VT = SV->getValueType(0); + DebugLoc dl = SV->getDebugLoc(); + SDValue V1 = SV->getOperand(0); + int NumElems = VT.getVectorNumElements(); + int EltNo = SV->getSplatIndex(); + + // unpack elements to the correct location + while (NumElems > 4) { + if (EltNo < NumElems/2) { + V1 = getUnpackl(DAG, dl, VT, V1, V1); + } else { + V1 = getUnpackh(DAG, dl, VT, V1, V1); + EltNo -= NumElems/2; } - SDValue Cst = DAG.getConstant(EltNo, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + NumElems >>= 1; } - + + // Perform the splat. + int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, - DAG.getUNDEF(PVT), Mask); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); } /// isVectorLoad - Returns true if the node is a vector load, a scalar @@ -3095,32 +2886,28 @@ static bool isVectorLoad(SDValue Op) { /// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. /// -static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, - SelectionDAG &DAG, bool HasSSE3) { +static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG, + bool HasSSE3) { // If we have sse3 and shuffle has more than one use or input is a load, then // use movddup. Otherwise, use movlhps. - bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); + SDValue V1 = SV->getOperand(0); + + bool UseMovddup = HasSSE3 && (!SV->hasOneUse() || isVectorLoad(V1)); MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; - MVT VT = Op.getValueType(); + MVT VT = SV->getValueType(0); if (VT == PVT) - return Op; - DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = PVT.getVectorNumElements(); - if (NumElems == 2) { - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + return SDValue(SV, 0); + + DebugLoc dl = SV->getDebugLoc(); + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + if (PVT.getVectorNumElements() == 2) { + int Mask[2] = { 0, 0 }; + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); } else { - assert(NumElems == 4); - SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); - SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - Cst0, Cst1, Cst0, Cst1); + int Mask[4] = { 0, 1, 0, 1 }; + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); } - - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, - DAG.getUNDEF(PVT), Mask); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3130,39 +2917,31 @@ static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool isZero, bool HasSSE2, SelectionDAG &DAG) { - DebugLoc dl = V2.getDebugLoc(); MVT VT = V2.getValueType(); SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, dl) : DAG.getUNDEF(VT); - unsigned NumElems = V2.getValueType().getVectorNumElements(); - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT EVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 16> MaskVec; + ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<int, 16> MaskVec; for (unsigned i = 0; i != NumElems; ++i) - if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. - MaskVec.push_back(DAG.getConstant(NumElems, EVT)); - else - MaskVec.push_back(DAG.getConstant(i, EVT)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); + // If this is the insertion idx, put the low elt of V2 here. + MaskVec.push_back(i == Idx ? NumElems : i); + return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); } /// getNumOfConsecutiveZeros - Return the number of elements in a result of /// a shuffle that is zero. static -unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, - unsigned NumElems, bool Low, - SelectionDAG &DAG) { +unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, const int *Mask, + int NumElems, bool Low, SelectionDAG &DAG) { unsigned NumZeros = 0; - for (unsigned i = 0; i < NumElems; ++i) { + for (int i = 0; i < NumElems; ++i) { unsigned Index = Low ? i : NumElems-i-1; - SDValue Idx = Mask.getOperand(Index); - if (Idx.getOpcode() == ISD::UNDEF) { + int Idx = Mask[Index]; + if (Idx < 0) { ++NumZeros; continue; } - SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); + SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); if (Elt.getNode() && isZeroNode(Elt)) ++NumZeros; else @@ -3173,40 +2952,40 @@ unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, /// isVectorShift - Returns true if the shuffle can be implemented as a /// logical left or right shift of a vector. -static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, +/// FIXME: split into pslldqi, psrldqi, palignr variants. +static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - unsigned NumElems = Mask.getNumOperands(); + const int *Mask = SVOp->getMask(); + int NumElems = SVOp->getValueType(0).getVectorNumElements(); isLeft = true; - unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, true, DAG); if (!NumZeros) { isLeft = false; - NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); + NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, false, DAG); if (!NumZeros) return false; } - bool SeenV1 = false; bool SeenV2 = false; - for (unsigned i = NumZeros; i < NumElems; ++i) { - unsigned Val = isLeft ? (i - NumZeros) : i; - SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); - if (Idx.getOpcode() == ISD::UNDEF) + for (int i = NumZeros; i < NumElems; ++i) { + int Val = isLeft ? (i - NumZeros) : i; + int Idx = Mask[isLeft ? i : (i - NumZeros)]; + if (Idx < 0) continue; - unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue(); - if (Index < NumElems) + if (Idx < NumElems) SeenV1 = true; else { - Index -= NumElems; + Idx -= NumElems; SeenV2 = true; } - if (Index != Val) + if (Idx != Val) return false; } if (SeenV1 && SeenV2) return false; - ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); + ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); ShAmt = NumZeros; return true; } @@ -3291,8 +3070,8 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// getVShift - Return a vector logical shift node. /// static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, - unsigned NumBits, SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + unsigned NumBits, SelectionDAG &DAG, + const TargetLowering &TLI, DebugLoc dl) { bool isMMX = VT.getSizeInBits() == 64; MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; @@ -3377,11 +3156,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. if (Idx != 0) { - SDValue Ops[] = { - Item, DAG.getUNDEF(Item.getValueType()), - getSwapEltZeroMask(VecElts, Idx, DAG, dl) - }; - Item = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VecVT, Ops, 3); + SmallVector<int, 4> Mask; + Mask.push_back(Idx); + for (unsigned i = 1; i != VecElts; ++i) + Mask.push_back(i); + Item = DAG.getVectorShuffle(VecVT, dl, Item, + DAG.getUNDEF(Item.getValueType()), + &Mask[0]); } return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); } @@ -3425,15 +3206,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget->hasSSE2(), DAG); - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT MaskEVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 8> MaskVec; + SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i++) - MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Item, - DAG.getUNDEF(VT), Mask); + MaskVec.push_back(i == Idx ? 0 : 1); + return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); } } @@ -3491,54 +3267,48 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { V[i] = V[i*2]; // Must be a zero vector. break; case 1: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2+1], V[i*2], - getMOVLMask(NumElems, DAG, dl)); + V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); break; case 2: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], - getMOVLMask(NumElems, DAG, dl)); + V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); break; case 3: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], - getUnpacklMask(NumElems, DAG, dl)); + V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); break; } } - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT EVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 8> MaskVec; + SmallVector<int, 8> MaskVec; bool Reverse = (NonZeros & 0x3) == 2; for (unsigned i = 0; i < 2; ++i) - if (Reverse) - MaskVec.push_back(DAG.getConstant(1-i, EVT)); - else - MaskVec.push_back(DAG.getConstant(i, EVT)); + MaskVec.push_back(Reverse ? 1-i : i); Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; for (unsigned i = 0; i < 2; ++i) - if (Reverse) - MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); - else - MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[0], V[1], ShufMask); + MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); + return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } if (Values.size() > 2) { + // If we have SSE 4.1, Expand into a number of inserts. + if (getSubtarget()->hasSSE41()) { + V[0] = DAG.getUNDEF(VT); + for (unsigned i = 0; i < NumElems; ++i) + if (Op.getOperand(i).getOpcode() != ISD::UNDEF) + V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], + Op.getOperand(i), DAG.getIntPtrConstant(i)); + return V[0]; + } // Expand into a number of unpckl*. // e.g. for v4f32 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - SDValue UnpckMask = getUnpacklMask(NumElems, DAG, dl); for (unsigned i = 0; i < NumElems; ++i) V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); NumElems >>= 1; while (NumElems != 0) { for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i], V[i + NumElems], - UnpckMask); + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); NumElems >>= 1; } return V[0]; @@ -3553,11 +3323,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) static -SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, - SDValue PermMask, SelectionDAG &DAG, - X86TargetLowering &TLI, DebugLoc dl) { - SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(), - PermMask.getNode()->op_end()); +SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + const int *Mask = SVOp->getMask(); SmallVector<int, 8> MaskVals; // Determine if more than 1 of the words in each of the low and high quadwords @@ -3568,9 +3339,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, BitVector InputQuads(4); for (unsigned i = 0; i < 8; ++i) { SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad; - SDValue Elt = MaskElts[i]; - int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : - cast<ConstantSDNode>(Elt)->getZExtValue(); + int EltIdx = Mask[i]; MaskVals.push_back(EltIdx); if (EltIdx < 0) { ++Quad[0]; @@ -3623,14 +3392,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // words from all 4 input quadwords. SDValue NewV; if (BestLoQuad >= 0 || BestHiQuad >= 0) { - SmallVector<SDValue,8> MaskV; - MaskV.push_back(DAG.getConstant(BestLoQuad < 0 ? 0 : BestLoQuad, MVT::i64)); - MaskV.push_back(DAG.getConstant(BestHiQuad < 0 ? 1 : BestHiQuad, MVT::i64)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, &MaskV[0], 2); - - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), Mask); + SmallVector<int, 8> MaskV; + MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); + MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); + NewV = DAG.getVectorShuffle(MVT::v2i64, dl, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the @@ -3668,15 +3435,8 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - MaskV.clear(); - for (unsigned i = 0; i != 8; ++i) - MaskV.push_back((MaskVals[i] < 0) ? DAG.getUNDEF(MVT::i16) - : DAG.getConstant(MaskVals[i], - MVT::i16)); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, - &MaskV[0], 8)); + return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); } } @@ -3733,49 +3493,45 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // and update MaskVals with new element order. BitVector InOrder(8); if (BestLoQuad >= 0) { - SmallVector<SDValue, 8> MaskV; + SmallVector<int, 8> MaskV; for (int i = 0; i != 4; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestLoQuad) { - MaskV.push_back(DAG.getConstant(idx & 3, MVT::i16)); + MaskV.push_back(idx & 3); InOrder.set(i); } else { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); } } for (unsigned i = 4; i != 8; ++i) - MaskV.push_back(DAG.getConstant(i, MVT::i16)); - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v8i16, &MaskV[0], 8)); + MaskV.push_back(i); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); } // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { - SmallVector<SDValue, 8> MaskV; + SmallVector<int, 8> MaskV; for (unsigned i = 0; i != 4; ++i) - MaskV.push_back(DAG.getConstant(i, MVT::i16)); + MaskV.push_back(i); for (unsigned i = 4; i != 8; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestHiQuad) { - MaskV.push_back(DAG.getConstant((idx & 3) + 4, MVT::i16)); + MaskV.push_back((idx & 3) + 4); InOrder.set(i); } else { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); } } - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v8i16, &MaskV[0], 8)); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -3811,11 +3567,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // 2. [ssse3] 2 x pshufb + 1 x por // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw static -SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, - SDValue PermMask, SelectionDAG &DAG, - X86TargetLowering &TLI, DebugLoc dl) { - SmallVector<SDValue, 16> MaskElts(PermMask.getNode()->op_begin(), - PermMask.getNode()->op_end()); +SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + const int *Mask = SVOp->getMask(); SmallVector<int, 16> MaskVals; // If we have SSSE3, case 1 is generated when all result bytes come from @@ -3825,9 +3582,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, bool V1Only = true; bool V2Only = true; for (unsigned i = 0; i < 16; ++i) { - SDValue Elt = MaskElts[i]; - int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : - cast<ConstantSDNode>(Elt)->getZExtValue(); + int EltIdx = Mask[i]; MaskVals.push_back(EltIdx); if (EltIdx < 0) continue; @@ -3958,11 +3713,14 @@ SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> static -SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, - MVT VT, - SDValue PermMask, SelectionDAG &DAG, - TargetLowering &TLI, DebugLoc dl) { - unsigned NumElems = PermMask.getNumOperands(); +SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, + TargetLowering &TLI, DebugLoc dl) { + MVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + const int *PermMask = SVOp->getMask(); + unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); MVT MaskEltVT = MaskVT.getVectorElementType(); @@ -3981,38 +3739,35 @@ SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, else NewVT = MVT::v2f64; } - unsigned Scale = NumElems / NewWidth; - SmallVector<SDValue, 8> MaskVec; + int Scale = NumElems / NewWidth; + SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { - unsigned StartIdx = ~0U; - for (unsigned j = 0; j < Scale; ++j) { - SDValue Elt = PermMask.getOperand(i+j); - if (Elt.getOpcode() == ISD::UNDEF) + int StartIdx = -1; + for (int j = 0; j < Scale; ++j) { + int EltIdx = PermMask[i+j]; + if (EltIdx < 0) continue; - unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); - if (StartIdx == ~0U) + if (StartIdx == -1) StartIdx = EltIdx - (EltIdx % Scale); if (EltIdx != StartIdx + j) return SDValue(); } - if (StartIdx == ~0U) - MaskVec.push_back(DAG.getUNDEF(MaskEltVT)); + if (StartIdx == -1) + MaskVec.push_back(-1); else - MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); + MaskVec.push_back(StartIdx / Scale); } V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size())); + return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); } /// getVZextMovL - Return a zero-extending vector move low node. /// static SDValue getVZextMovL(MVT VT, MVT OpVT, - SDValue SrcOp, SelectionDAG &DAG, - const X86Subtarget *Subtarget, DebugLoc dl) { + SDValue SrcOp, SelectionDAG &DAG, + const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { LoadSDNode *LD = NULL; if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) @@ -4046,31 +3801,37 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT, /// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of /// shuffles. static SDValue -LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, - SDValue PermMask, MVT VT, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = PermMask.getValueType(); - MVT MaskEVT = MaskVT.getVectorElementType(); +LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + MVT VT = SVOp->getValueType(0); + const int *PermMaskPtr = SVOp->getMask(); + SmallVector<std::pair<int, int>, 8> Locs; Locs.resize(4); - SmallVector<SDValue, 8> Mask1(4, DAG.getUNDEF(MaskEVT)); + SmallVector<int, 8> Mask1(4U, -1); + SmallVector<int, 8> PermMask; + + for (unsigned i = 0; i != 8; ++i) + PermMask.push_back(PermMaskPtr[i]); + unsigned NumHi = 0; unsigned NumLo = 0; for (unsigned i = 0; i != 4; ++i) { - SDValue Elt = PermMask.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) { + int Idx = PermMask[i]; + if (Idx < 0) { Locs[i] = std::make_pair(-1, -1); } else { - unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); - assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); - if (Val < 4) { + assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!"); + if (Idx < 4) { Locs[i] = std::make_pair(0, NumLo); - Mask1[NumLo] = Elt; + Mask1[NumLo] = Idx; NumLo++; } else { Locs[i] = std::make_pair(1, NumHi); if (2+NumHi < 4) - Mask1[2+NumHi] = Elt; + Mask1[2+NumHi] = Idx; NumHi++; } } @@ -4081,24 +3842,21 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, // implemented with two shuffles. First shuffle gather the elements. // The second shuffle, which takes the first shuffle as both of its // vector operands, put the elements into the right order. - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &Mask1[0], Mask1.size())); + V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); - SmallVector<SDValue, 8> Mask2(4, DAG.getUNDEF(MaskEVT)); + SmallVector<int, 8> Mask2(4U, -1); + for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) continue; else { unsigned Idx = (i < 2) ? 0 : 4; Idx += Locs[i].first * 2 + Locs[i].second; - Mask2[i] = DAG.getConstant(Idx, MaskEVT); + Mask2[i] = Idx; } } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &Mask2[0], Mask2.size())); + return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); } else if (NumLo == 3 || NumHi == 3) { // Otherwise, we must have three elements from one vector, call it X, and // one element from the other, call it Y. First, use a shufps to build an @@ -4109,60 +3867,51 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, // from X. if (NumHi == 3) { // Normalize it so the 3 elements come from V1. - PermMask = CommuteVectorShuffleMask(PermMask, DAG, dl); + CommuteVectorShuffleMask(PermMask, VT); std::swap(V1, V2); } // Find the element from V2. unsigned HiIndex; for (HiIndex = 0; HiIndex < 3; ++HiIndex) { - SDValue Elt = PermMask.getOperand(HiIndex); - if (Elt.getOpcode() == ISD::UNDEF) + int Val = PermMask[HiIndex]; + if (Val < 0) continue; - unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); if (Val >= 4) break; } - Mask1[0] = PermMask.getOperand(HiIndex); - Mask1[1] = DAG.getUNDEF(MaskEVT); - Mask1[2] = PermMask.getOperand(HiIndex^1); - Mask1[3] = DAG.getUNDEF(MaskEVT); - V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &Mask1[0], 4)); + Mask1[0] = PermMask[HiIndex]; + Mask1[1] = -1; + Mask1[2] = PermMask[HiIndex^1]; + Mask1[3] = -1; + V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); if (HiIndex >= 2) { - Mask1[0] = PermMask.getOperand(0); - Mask1[1] = PermMask.getOperand(1); - Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); - Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MaskVT, &Mask1[0], 4)); + Mask1[0] = PermMask[0]; + Mask1[1] = PermMask[1]; + Mask1[2] = HiIndex & 1 ? 6 : 4; + Mask1[3] = HiIndex & 1 ? 4 : 6; + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); } else { - Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); - Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); - Mask1[2] = PermMask.getOperand(2); - Mask1[3] = PermMask.getOperand(3); - if (Mask1[2].getOpcode() != ISD::UNDEF) - Mask1[2] = - DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getZExtValue()+4, - MaskEVT); - if (Mask1[3].getOpcode() != ISD::UNDEF) - Mask1[3] = - DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getZExtValue()+4, - MaskEVT); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V2, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MaskVT, &Mask1[0], 4)); + Mask1[0] = HiIndex & 1 ? 2 : 0; + Mask1[1] = HiIndex & 1 ? 0 : 2; + Mask1[2] = PermMask[2]; + Mask1[3] = PermMask[3]; + if (Mask1[2] >= 0) + Mask1[2] += 4; + if (Mask1[3] >= 0) + Mask1[3] += 4; + return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); } } // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); - SmallVector<SDValue,8> LoMask(4, DAG.getUNDEF(MaskEVT)); - SmallVector<SDValue,8> HiMask(4, DAG.getUNDEF(MaskEVT)); - SmallVector<SDValue,8> *MaskPtr = &LoMask; + SmallVector<int,8> LoMask(4U, -1); + SmallVector<int,8> HiMask(4U, -1); + + SmallVector<int,8> *MaskPtr = &LoMask; unsigned MaskIdx = 0; unsigned LoIdx = 0; unsigned HiIdx = 2; @@ -4173,84 +3922,68 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, LoIdx = 0; HiIdx = 2; } - SDValue Elt = PermMask.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) { + int Idx = PermMask[i]; + if (Idx < 0) { Locs[i] = std::make_pair(-1, -1); - } else if (cast<ConstantSDNode>(Elt)->getZExtValue() < 4) { + } else if (Idx < 4) { Locs[i] = std::make_pair(MaskIdx, LoIdx); - (*MaskPtr)[LoIdx] = Elt; + (*MaskPtr)[LoIdx] = Idx; LoIdx++; } else { Locs[i] = std::make_pair(MaskIdx, HiIdx); - (*MaskPtr)[HiIdx] = Elt; + (*MaskPtr)[HiIdx] = Idx; HiIdx++; } } - SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &LoMask[0], LoMask.size())); - SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &HiMask[0], HiMask.size())); - SmallVector<SDValue, 8> MaskOps; + SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); + SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); + SmallVector<int, 8> MaskOps; for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) { - MaskOps.push_back(DAG.getUNDEF(MaskEVT)); + MaskOps.push_back(-1); } else { unsigned Idx = Locs[i].first * 4 + Locs[i].second; - MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); + MaskOps.push_back(Idx); } } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LoShuffle, HiShuffle, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskOps[0], MaskOps.size())); + return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); } SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); MVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = PermMask.getNumOperands(); + const int *PermMask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); + unsigned NumElems = VT.getVectorNumElements(); bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - // FIXME: Check for legal shuffle and return? - - if (isUndefShuffle(Op.getNode())) - return DAG.getUNDEF(VT); - - if (isZeroShuffle(Op.getNode())) + if (isZeroShuffle(SVOp)) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); - if (isIdentityMask(PermMask.getNode())) - return V1; - else if (isIdentityMask(PermMask.getNode(), true)) - return V2; - // Canonicalize movddup shuffles. - if (V2IsUndef && Subtarget->hasSSE2() && - VT.getSizeInBits() == 128 && - X86::isMOVDDUPMask(PermMask.getNode())) - return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); + if (V2IsUndef && Subtarget->hasSSE2() && VT.getSizeInBits() == 128 && + X86::isMOVDDUPMask(SVOp)) + return CanonicalizeMovddup(SVOp, DAG, Subtarget->hasSSE3()); - if (isSplatMask(PermMask.getNode())) { - if (isMMX || NumElems < 4) return Op; - // Promote it to a v4{if}32 splat. - return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); + // Promote splats to v4f32. + if (SVOp->isSplat()) { + if (isMMX || NumElems < 4) + return Op; + return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); } // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, - *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BIT_CONVERT, dl, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); @@ -4258,32 +3991,29 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, - DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); if (NewOp.getNode()) { - SDValue NewV1 = NewOp.getOperand(0); - SDValue NewV2 = NewOp.getOperand(1); - SDValue NewMask = NewOp.getOperand(2); - if (isCommutedMOVL(NewMask.getNode(), true, false)) { - NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); - return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget, - dl); - } + if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false)) + return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), + DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, - DAG, *this, dl); - if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp))) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), - DAG, Subtarget, dl); + DAG, Subtarget, dl); } } - + + if (X86::isPSHUFDMask(SVOp)) + return Op; + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); + bool isShift = getSubtarget()->hasSSE2() && + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4291,8 +4021,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - - if (X86::isMOVLMask(PermMask.getNode())) { + + if (X86::isMOVLMask(SVOp)) { if (V1IsUndef) return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) @@ -4300,17 +4030,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX) return Op; } - - if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || - X86::isMOVSLDUPMask(PermMask.getNode()) || - X86::isMOVHLPSMask(PermMask.getNode()) || - X86::isMOVHPMask(PermMask.getNode()) || - X86::isMOVLPMask(PermMask.getNode()))) + + // FIXME: fold these into legal mask. + if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || + X86::isMOVSLDUPMask(SVOp) || + X86::isMOVHLPSMask(SVOp) || + X86::isMOVHPMask(SVOp) || + X86::isMOVLPMask(SVOp))) return Op; - if (ShouldXformToMOVHLPS(PermMask.getNode()) || - ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) - return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (ShouldXformToMOVHLPS(SVOp) || + ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) + return CommuteVectorShuffle(SVOp, DAG); if (isShift) { // No better options. Use a vshl / vsrl. @@ -4318,7 +4049,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. @@ -4327,115 +4058,84 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Canonicalize the splat or undef, if present, to be on the RHS. if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + Op = CommuteVectorShuffle(SVOp, DAG); + SVOp = cast<ShuffleVectorSDNode>(Op); + V1 = SVOp->getOperand(0); + V2 = SVOp->getOperand(1); std::swap(V1IsSplat, V2IsSplat); std::swap(V1IsUndef, V2IsUndef); Commuted = true; } - // FIXME: Figure out a cleaner way to do this. - if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { - if (V2IsUndef) return V1; - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (V2IsSplat) { - // V2 is a splat, so the mask may be malformed. That is, it may point - // to any V2 element. The instruction selectior won't like this. Get - // a corrected mask and commute to form a proper MOVS{S|D}. - SDValue NewMask = getMOVLMask(NumElems, DAG, dl); - if (NewMask.getNode() != PermMask.getNode()) - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); - } - return Op; + if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { + // Shuffling low element of v1 into undef, just return v1. + if (V2IsUndef) + return V1; + // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which + // the instruction selector will not match, so get a canonical MOVL with + // swapped operands to undo the commute. + return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKLMask(PermMask.getNode()) || - X86::isUNPCKHMask(PermMask.getNode())) + if (X86::isUNPCKL_v_undef_Mask(SVOp) || + X86::isUNPCKH_v_undef_Mask(SVOp) || + X86::isUNPCKLMask(SVOp) || + X86::isUNPCKHMask(SVOp)) return Op; if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first // element then try to match unpck{h|l} again. If match, return a // new vector_shuffle with the corrected mask. - SDValue NewMask = NormalizeMask(PermMask, DAG); - if (NewMask.getNode() != PermMask.getNode()) { - if (X86::isUNPCKLMask(NewMask.getNode(), true)) { - SDValue NewMask = getUnpacklMask(NumElems, DAG, dl); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); - } else if (X86::isUNPCKHMask(NewMask.getNode(), true)) { - SDValue NewMask = getUnpackhMask(NumElems, DAG, dl); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); + SDValue NewMask = NormalizeMask(SVOp, DAG); + ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask); + if (NSVOp != SVOp) { + if (X86::isUNPCKLMask(NSVOp, true)) { + return NewMask; + } else if (X86::isUNPCKHMask(NSVOp, true)) { + return NewMask; } } } - // Normalize the node to match x86 shuffle ops if needed - if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (Commuted) { // Commute is back and try unpck* again. - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKLMask(PermMask.getNode()) || - X86::isUNPCKHMask(PermMask.getNode())) - return Op; + // FIXME: this seems wrong. + SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); + ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); + if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || + X86::isUNPCKH_v_undef_Mask(NewSVOp) || + X86::isUNPCKLMask(NewSVOp) || + X86::isUNPCKHMask(NewSVOp)) + return NewOp; } // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - // Try PSHUF* first, then SHUFP*. - // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically - // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. - if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { - if (V2.getOpcode() != ISD::UNDEF) - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, - DAG.getUNDEF(VT), PermMask); - return Op; - } - if (!isMMX) { - if (Subtarget->hasSSE2() && - (X86::isPSHUFDMask(PermMask.getNode()) || - X86::isPSHUFHWMask(PermMask.getNode()) || - X86::isPSHUFLWMask(PermMask.getNode()))) { - MVT RVT = VT; - if (VT == MVT::v4f32) { - RVT = MVT::v4i32; - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, - DAG.getNode(ISD::BIT_CONVERT, dl, RVT, V1), - DAG.getUNDEF(RVT), PermMask); - } else if (V2.getOpcode() != ISD::UNDEF) - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, V1, - DAG.getUNDEF(RVT), PermMask); - if (RVT != VT) - Op = DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); - return Op; - } - - // Binary or unary shufps. - if (X86::isSHUFPMask(PermMask.getNode()) || - (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) - return Op; - } + // Normalize the node to match x86 shuffle ops if needed + if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + return CommuteVectorShuffle(SVOp, DAG); + // Check for legal shuffle and return? + if (isShuffleMaskLegal(PermMask, VT)) + return Op; + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this, dl); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); if (NewOp.getNode()) return NewOp; } if (VT == MVT::v16i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(V1, V2, PermMask, DAG, *this, dl); + SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); if (NewOp.getNode()) return NewOp; } // Handle all 4 wide cases with a number of shuffles except for MMX. if (NumElems == 4 && !isMMX) - return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG, dl); + return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); return SDValue(); } @@ -4529,22 +4229,12 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; + // SHUFPS the element to the lowest double word, then movss. - MVT MaskVT = MVT::getIntVectorWithNumElements(4); - SmallVector<SDValue, 8> IdxVec; - IdxVec. - push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &IdxVec[0], IdxVec.size()); - SDValue Vec = Op.getOperand(0); - Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), - Vec, DAG.getUNDEF(Vec.getValueType()), Mask); + int Mask[4] = { Idx, -1, -1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } else if (VT.getSizeInBits() == 64) { @@ -4558,17 +4248,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. - MVT MaskVT = MVT::getIntVectorWithNumElements(2); - SmallVector<SDValue, 8> IdxVec; - IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &IdxVec[0], IdxVec.size()); - SDValue Vec = Op.getOperand(0); - Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), - Vec, DAG.getUNDEF(Vec.getValueType()), - Mask); + int Mask[2] = { 1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } @@ -5075,19 +4758,6 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); - SmallVector<SDValue, 4> MaskVec; - MaskVec.push_back(DAG.getConstant(0, MVT::i32)); - MaskVec.push_back(DAG.getConstant(4, MVT::i32)); - MaskVec.push_back(DAG.getConstant(1, MVT::i32)); - MaskVec.push_back(DAG.getConstant(5, MVT::i32)); - SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &MaskVec[0], MaskVec.size()); - SmallVector<SDValue, 4> MaskVec2; - MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); - MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, - &MaskVec2[0], MaskVec2.size()); - SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), @@ -5096,13 +4766,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), DAG.getIntPtrConstant(0))); - SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, - XR1, XR2, UnpcklMask); + SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, false, 16); - SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, - Unpck1, CLod0, UnpcklMask); + SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, @@ -5110,8 +4778,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. - SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2f64, - Sub, Sub, ShufMask); + int ShufMask[2] = { 1, -1 }; + SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, + DAG.getUNDEF(MVT::v2f64), ShufMask); SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, DAG.getIntPtrConstant(0)); @@ -7263,34 +6932,36 @@ bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool -X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { +X86TargetLowering::isShuffleMaskLegal(const int *Mask, MVT VT) const { // Only do shuffles on 128-bit vector types for now. - // FIXME: pshufb, blends - if (VT.getSizeInBits() == 64) return false; - return (Mask.getNode()->getNumOperands() <= 4 || - isIdentityMask(Mask.getNode()) || - isIdentityMask(Mask.getNode(), true) || - isSplatMask(Mask.getNode()) || - X86::isPSHUFHWMask(Mask.getNode()) || - X86::isPSHUFLWMask(Mask.getNode()) || - X86::isUNPCKLMask(Mask.getNode()) || - X86::isUNPCKHMask(Mask.getNode()) || - X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || - X86::isUNPCKH_v_undef_Mask(Mask.getNode())); + if (VT.getSizeInBits() == 64) + return false; + + // FIXME: pshufb, blends, palignr, shifts. + return (VT.getVectorNumElements() == 2 || + ShuffleVectorSDNode::isSplatMask(Mask, VT) || + isMOVLMask(Mask, VT) || + isSHUFPMask(Mask, VT) || + isPSHUFDMask(Mask, VT) || + isPSHUFHWMask(Mask, VT) || + isPSHUFLWMask(Mask, VT) || + isUNPCKLMask(Mask, VT) || + isUNPCKHMask(Mask, VT) || + isUNPCKL_v_undef_Mask(Mask, VT) || + isUNPCKH_v_undef_Mask(Mask, VT)); } bool -X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, - MVT EVT, SelectionDAG &DAG) const { - unsigned NumElts = BVOps.size(); - // Only do shuffles on 128-bit vector types for now. - if (EVT.getSizeInBits() * NumElts == 64) return false; - if (NumElts == 2) return true; - if (NumElts == 4) { - return (isMOVLMask(&BVOps[0], 4) || - isCommutedMOVL(&BVOps[0], 4, true) || - isSHUFPMask(&BVOps[0], 4) || - isCommutedSHUFP(&BVOps[0], 4)); +X86TargetLowering::isVectorClearMaskLegal(const int *Mask, MVT VT) const { + unsigned NumElts = VT.getVectorNumElements(); + // FIXME: This collection of masks seems suspect. + if (NumElts == 2) + return true; + if (NumElts == 4 && VT.getSizeInBits() == 128) { + return (isMOVLMask(Mask, VT) || + isCommutedMOVLMask(Mask, VT, true) || + isSHUFPMask(Mask, VT) || + isCommutedSHUFPMask(Mask, VT)); } return false; } @@ -8025,15 +7696,14 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, return false; } -static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, +static bool EltsFromConsecutiveLoads(SDNode *N, const int *PermMask, unsigned NumElems, MVT EVT, SDNode *&Base, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { Base = NULL; for (unsigned i = 0; i < NumElems; ++i) { - SDValue Idx = PermMask.getOperand(i); - if (Idx.getOpcode() == ISD::UNDEF) { + if (PermMask[i] < 0) { if (!Base) return false; continue; @@ -8066,12 +7736,12 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, /// shuffle to be an appropriate build vector so it can take advantage of // performBuildVectorCombine. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - SDValue PermMask = N->getOperand(2); - unsigned NumElems = PermMask.getNumOperands(); + const int *PermMask = cast<ShuffleVectorSDNode>(N)->getMask(); + unsigned NumElems = VT.getVectorNumElements(); // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 // where the upper half is 0, it is advantageous to rewrite it as a build @@ -8080,9 +7750,10 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, SDValue In[2]; In[0] = N->getOperand(0); In[1] = N->getOperand(1); - unsigned Idx0 =cast<ConstantSDNode>(PermMask.getOperand(0))->getZExtValue(); - unsigned Idx1 =cast<ConstantSDNode>(PermMask.getOperand(1))->getZExtValue(); - if (In[0].getValueType().getVectorNumElements() == NumElems && + unsigned Idx0 = PermMask[0]; + unsigned Idx1 = PermMask[1]; + // FIXME: can we take advantage of undef index? + if (PermMask[0] >= 0 && PermMask[1] >= 0 && In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { ConstantSDNode* InsertVecIdx = @@ -8546,9 +8217,9 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - isSplatMask(ShAmtOp.getOperand(2).getNode())) { - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); + cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); } else return SDValue(); |