diff options
author | David Greene <greened@obbligato.org> | 2011-02-28 19:06:56 +0000 |
---|---|---|
committer | David Greene <greened@obbligato.org> | 2011-02-28 19:06:56 +0000 |
commit | c4db4e5105ccd82df19f141957511f735a9be2d0 (patch) | |
tree | 3b2142f2631361a7b5b122c2151512b011024485 | |
parent | d436d5b1c993c78b6c267b5fc1056d593f07921f (diff) | |
download | external_llvm-c4db4e5105ccd82df19f141957511f735a9be2d0.zip external_llvm-c4db4e5105ccd82df19f141957511f735a9be2d0.tar.gz external_llvm-c4db4e5105ccd82df19f141957511f735a9be2d0.tar.bz2 |
[AVX] Add decode support for VUNPCKLPS/D instructions, both 128-bit
and 256-bit forms. Because the number of elements in a vector
does not determine the vector type (4 elements could be v4f32 or
v4f64), pass the full type of the vector to decode routines.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126664 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/InstPrinter/X86InstComments.cpp | 40 | ||||
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.cpp | 47 | ||||
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.h | 26 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 37 |
4 files changed, 120 insertions, 30 deletions
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 12144e3..c642acc 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // FALL THROUGH. case X86::PUNPCKLBWrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(16, ShuffleMask); + DecodePUNPCKLBWMask(16, ShuffleMask); break; case X86::PUNPCKLWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLWDrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(8, ShuffleMask); + DecodePUNPCKLWDMask(8, ShuffleMask); break; case X86::PUNPCKLDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(4, ShuffleMask); + DecodePUNPCKLDQMask(4, ShuffleMask); break; case X86::PUNPCKLQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLQDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(2, ShuffleMask); + DecodePUNPCKLQDQMask(2, ShuffleMask); break; case X86::SHUFPDrri: @@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPDrm: - DecodeUNPCKLPMask(2, ShuffleMask); + DecodeUNPCKLPDMask(2, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKLPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPDrm: + DecodeUNPCKLPDMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKLPDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPDYrm: + DecodeUNPCKLPDMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::UNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPSrm: - DecodeUNPCKLPMask(4, ShuffleMask); + DecodeUNPCKLPSMask(4, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPSrm: + DecodeUNPCKLPSMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKLPSYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPSYrm: + DecodeUNPCKLPSMask(8, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::UNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 1287977..76f4d3b 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -1,4 +1,4 @@ -//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===// +//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// // // The LLVM Compiler Infrastructure // @@ -95,12 +95,29 @@ void DecodePSHUFLWMask(unsigned Imm, ShuffleMask.push_back(7); } -void DecodePUNPCKLMask(unsigned NElts, +void DecodePUNPCKLBWMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask); +} + +void DecodePUNPCKLWDMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask); +} + +void DecodePUNPCKLDQMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); +} + +void DecodePUNPCKLQDQMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); +} + +void DecodePUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i); - ShuffleMask.push_back(i+NElts); - } + DecodeUNPCKLPMask(VT, ShuffleMask); } void DecodePUNPCKHMask(unsigned NElts, @@ -133,12 +150,24 @@ void DecodeUNPCKHPMask(unsigned NElts, } } +void DecodeUNPCKLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); +} + +void DecodeUNPCKLPDMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); +} /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. NElts indicates the number of elements in the vector allowing it to -/// handle different datatypes and vector widths. -void DecodeUNPCKLPMask(unsigned NElts, +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { + + int NElts = VT.getVectorNumElements(); + for (unsigned i = 0; i != NElts/2; ++i) { ShuffleMask.push_back(i); // Reads from dest ShuffleMask.push_back(i+NElts); // Reads from src diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 50d9ccb..b18f670 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -16,6 +16,7 @@ #define X86_SHUFFLE_DECODE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ValueTypes.h" //===----------------------------------------------------------------------===// // Vector Mask Decoding @@ -45,7 +46,19 @@ void DecodePSHUFHWMask(unsigned Imm, void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); -void DecodePUNPCKLMask(unsigned NElts, +void DecodePUNPCKLBWMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLWDMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLDQMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLQDQMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); void DecodePUNPCKHMask(unsigned NElts, @@ -57,11 +70,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, void DecodeUNPCKHPMask(unsigned NElts, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeUNPCKLPDMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. NElts indicates the number of elements in the vector allowing it to -/// handle different datatypes and vector widths. -void DecodeUNPCKLPMask(unsigned NElts, +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); } // llvm namespace diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2f49dbc..09ec69d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3895,11 +3895,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: - DecodePUNPCKLMask(NumElems, ShuffleMask); + DecodePUNPCKLMask(VT, ShuffleMask); break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - DecodeUNPCKLPMask(NumElems, ShuffleMask); + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: + DecodeUNPCKLPMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: DecodeMOVHLPSMask(NumElems, ShuffleMask); @@ -5263,6 +5267,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); + Locs.resize(4); SmallVector<int,8> LoMask(4U, -1); SmallVector<int,8> HiMask(4U, -1); @@ -5508,12 +5513,16 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT) { +static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v4f32: return X86ISD::UNPCKLPS; - case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v4f32: + return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS; + case MVT::v2f64: + return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + case MVT::v8f32: return X86ISD::VUNPCKLPSY; + case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; default: @@ -5641,7 +5650,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); @@ -5762,7 +5771,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V1, V2, DAG); if (X86::isUNPCKHMask(SVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); @@ -5789,7 +5799,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); @@ -5812,8 +5823,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { - if (VT == MVT::v2f64) - return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); + if (VT == MVT::v2f64) { + X86ISD::NodeType Opcode = + getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG); + } if (VT == MVT::v2i64) return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); } @@ -5840,7 +5854,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (X86::isUNPCKL_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V1, V1, DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); |