diff options
author | Craig Topper <craig.topper@gmail.com> | 2011-11-20 00:12:05 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2011-11-20 00:12:05 +0000 |
commit | 0d86d462f8458b803d5209a77961dc63d5a9dae0 (patch) | |
tree | 935eac090efbcfb131d86162de66cb003c9ba45c /lib/Target | |
parent | 745a86bac9684f9617aeb0e1566194ca797a64d4 (diff) | |
download | external_llvm-0d86d462f8458b803d5209a77961dc63d5a9dae0.zip external_llvm-0d86d462f8458b803d5209a77961dc63d5a9dae0.tar.gz external_llvm-0d86d462f8458b803d5209a77961dc63d5a9dae0.tar.bz2 |
Add code for lowering v32i8 shifts by a splat to AVX2 immediate shift instructions. Remove 256-bit splat handling from LowerShift as it was already handled by PerformShiftCombine.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145005 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 81 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 9 |
2 files changed, 45 insertions, 45 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 61d9246..470a115 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10338,47 +10338,48 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { return Res; } - if (Subtarget->hasAVX2()) { - if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); + if (Subtarget->hasAVX2() && VT == MVT::v32i8) { + if (Op.getOpcode() == ISD::SHL) { + // Make a large shift. + SDValue SHL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the rightmost bits. + SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt), + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SHL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); } + if (Op.getOpcode() == ISD::SRL) { + // Make a large shift. + SDValue SRL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the leftmost bits. + SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SRL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + } + if (Op.getOpcode() == ISD::SRA) { + if (ShiftAmt == 7) { + // R s>> 7 === R s< 0 + SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); + return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); + } + + // R s>> a === ((R u>> a) ^ m) - m + SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt, + MVT::i8)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32); + Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); + Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); + return Res; + } + } } } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1e74da1..68010b7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -311,17 +311,16 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // JIT implementation, it does not expand the instructions below like // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in { def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in + let Predicates = [HasAVX] in def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX2] in + let Predicates = [HasAVX2] in def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V; +} //===----------------------------------------------------------------------===// |