From f58e4144054b85e855c57c86eb058a6bb1907552 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 14 Nov 2013 11:29:27 +0000 Subject: AVX-512: Handled extractelement from mask vector; Added VMOSHDUP/VMOVSLDUP shuffle instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194691 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 43 ++++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrAVX512.td | 33 +++++++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.cpp | 6 ++++-- 3 files changed, 80 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6df0fd8..a878ea8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16323,6 +16323,44 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } +/// Extract one bit from mask vector, like v16i1 or v8i1. +/// AVX-512 feature. +static SDValue ExtractBitFromMaskVector(SDNode *N, SelectionDAG &DAG) { + SDValue Vec = N->getOperand(0); + SDLoc dl(Vec); + MVT VecVT = Vec.getSimpleValueType(); + SDValue Idx = N->getOperand(1); + MVT EltVT = N->getSimpleValueType(0); + + assert((VecVT.getVectorElementType() == MVT::i1 && EltVT == MVT::i8) || + "Unexpected operands in ExtractBitFromMaskVector"); + + // variable index + if (!isa(Idx)) { + MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); + SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + ExtVT.getVectorElementType(), Ext); + return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); + } + + unsigned IdxVal = cast(Idx)->getZExtValue(); + + MVT ScalarVT = MVT::getIntegerVT(VecVT.getSizeInBits()); + unsigned MaxShift = VecVT.getSizeInBits() - 1; + Vec = DAG.getNode(ISD::BITCAST, dl, ScalarVT, Vec); + Vec = DAG.getNode(ISD::SHL, dl, ScalarVT, Vec, + DAG.getConstant(MaxShift - IdxVal, ScalarVT)); + Vec = DAG.getNode(ISD::SRL, dl, ScalarVT, Vec, + DAG.getConstant(MaxShift, ScalarVT)); + + if (VecVT == MVT::v16i1) { + Vec = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Vec); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Vec); + } + return DAG.getNode(ISD::BITCAST, dl, MVT::i8, Vec); +} + /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// to a simple store and scalar loads to extract the elements. @@ -16333,6 +16371,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return NewOp; SDValue InputVector = N->getOperand(0); + + if (InputVector.getValueType().getVectorElementType() == MVT::i1 && + !DCI.isBeforeLegalize()) + return ExtractBitFromMaskVector(N, DAG); + // Detect whether we are trying to convert from mmx to i32 and the bitcast // from mmx to v2i32 has a single usage. if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST && diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8935f90..cb19fbd 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2075,6 +2075,38 @@ defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), (VMOVDDUPZrm addr:$src)>; +//===---------------------------------------------------------------------===// +// Replicate Single FP - MOVSHDUP and MOVSLDUP +//===---------------------------------------------------------------------===// +multiclass avx512_replicate_sfp op, SDNode OpNode, string OpcodeStr, + ValueType vt, RegisterClass RC, PatFrag mem_frag, + X86MemOperand x86memop> { + def rr : AVX512XSI, EVEX; + let mayLoad = 1 in + def rm : AVX512XSI, EVEX; +} + +defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v16f32, VR512, memopv16f32, f512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v16f32, VR512, memopv16f32, f512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + +def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>; +def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))), + (VMOVSHDUPZrm addr:$src)>; +def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>; +def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))), + (VMOVSLDUPZrm addr:$src)>; + +//===----------------------------------------------------------------------===// +// Move Low to High and High to Low packed FP Instructions +//===----------------------------------------------------------------------===// def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -3385,6 +3417,7 @@ multiclass avx512_alignr, EVEX_4V; + let mayLoad = 1 in def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), !strconcat(OpcodeStr, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2351cff..0a668a8 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3165,7 +3165,8 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); return load ? X86::LD_Fp80m : X86::ST_FpP80m; case 16: { - assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); + assert((X86::VR128RegClass.hasSubClassEq(RC) || + X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass"); // If stack is realigned we can use aligned stores. if (isStackAligned) return load ? @@ -3177,7 +3178,8 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); } case 32: - assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); + assert((X86::VR256RegClass.hasSubClassEq(RC) || + X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass"); // If stack is realigned we can use aligned stores. if (isStackAligned) return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; -- cgit v1.1