diff options
author | Craig Topper <craig.topper@gmail.com> | 2012-05-03 07:12:59 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2012-05-03 07:12:59 +0000 |
commit | 6b28d356c56d656e8e4d23c71de80162bb2eba5e (patch) | |
tree | de4e753e474ece575a7da13fb040fe4bc7811a4f /lib | |
parent | d99d68bcee8ce91a18c397756c702363e030fd83 (diff) | |
download | external_llvm-6b28d356c56d656e8e4d23c71de80162bb2eba5e.zip external_llvm-6b28d356c56d656e8e4d23c71de80162bb2eba5e.tar.gz external_llvm-6b28d356c56d656e8e4d23c71de80162bb2eba5e.tar.bz2 |
Fix 256-bit vpshuflw and vpshufhw immediate encoding to handle undefs in the lower half correctly. Missed in r155982.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156059 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.cpp | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 51 |
2 files changed, 36 insertions, 21 deletions
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index a1f2424..f0d9467 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -82,8 +82,7 @@ void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { void DecodePSHUFHWMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { - unsigned NumLanes = VT.getSizeInBits() / 128; - unsigned NumElts = 8 * NumLanes; + unsigned NumElts = VT.getVectorNumElements(); for (unsigned l = 0; l != NumElts; l += 8) { unsigned NewImm = Imm; @@ -99,8 +98,7 @@ void DecodePSHUFHWMask(EVT VT, unsigned Imm, void DecodePSHUFLWMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { - unsigned NumLanes = VT.getSizeInBits() / 128; - unsigned NumElts = 8 * NumLanes; + unsigned NumElts = VT.getVectorNumElements(); for (unsigned l = 0; l != NumElts; l += 8) { unsigned NewImm = Imm; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bdbdaf5..cacdb52 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3904,9 +3904,8 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { int Elt = N->getMaskElt(i); if (Elt < 0) continue; - Elt %= NumLaneElts; - unsigned ShAmt = i << Shift; - if (ShAmt >= 8) ShAmt -= 8; + Elt &= NumLaneElts - 1; + unsigned ShAmt = (i << Shift) % 8; Mask |= Elt << ShAmt; } @@ -3916,30 +3915,48 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + + assert((VT == MVT::v8i16 || VT == MVT::v16i16) && + "Unsupported vector type for PSHUFHW"); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned Mask = 0; - // 8 nodes, but we only care about the last 4. - for (unsigned i = 7; i >= 4; --i) { - int Val = N->getMaskElt(i); - if (Val >= 0) - Mask |= (Val - 4); - if (i != 4) - Mask <<= 2; + for (unsigned l = 0; l != NumElts; l += 8) { + // 8 nodes per lane, but we only care about the last 4. + for (unsigned i = 0; i < 4; ++i) { + int Elt = N->getMaskElt(l+i+4); + if (Elt < 0) continue; + Elt &= 0x3; // only 2-bits. + Mask |= Elt << (i * 2); + } } + return Mask; } /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + + assert((VT == MVT::v8i16 || VT == MVT::v16i16) && + "Unsupported vector type for PSHUFHW"); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned Mask = 0; - // 8 nodes, but we only care about the first 4. - for (int i = 3; i >= 0; --i) { - int Val = N->getMaskElt(i); - if (Val >= 0) - Mask |= Val; - if (i != 0) - Mask <<= 2; + for (unsigned l = 0; l != NumElts; l += 8) { + // 8 nodes per lane, but we only care about the first 4. + for (unsigned i = 0; i < 4; ++i) { + int Elt = N->getMaskElt(l+i); + if (Elt < 0) continue; + Elt &= 0x3; // only 2-bits + Mask |= Elt << (i * 2); + } } + return Mask; } |