aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLang Hames <lhames@gmail.com>2011-12-17 01:08:46 +0000
committerLang Hames <lhames@gmail.com>2011-12-17 01:08:46 +0000
commit8b99c1e42cff9a55996907901a5ad81a97f24912 (patch)
tree425deee442ab128f076c0e317cde4269ba72b1ef
parent2027379985f1cbb965be808adad5b819a66dd97f (diff)
downloadexternal_llvm-8b99c1e42cff9a55996907901a5ad81a97f24912.zip
external_llvm-8b99c1e42cff9a55996907901a5ad81a97f24912.tar.gz
external_llvm-8b99c1e42cff9a55996907901a5ad81a97f24912.tar.bz2
Make sure that the lower bits on the VSELECT condition are properly set.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146800 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp52
-rw-r--r--test/CodeGen/X86/2011-12-15-vec_shift.ll15
2 files changed, 40 insertions, 27 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a7b38a5..0cf0bd9 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -10168,48 +10168,54 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
+ assert((Subtarget->hasSSE2() || Subtarget->hasAVX()) &&
+ "Need SSE2 for pslli/pcmpeq.");
+
// a = a << 5;
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
Op.getOperand(1), DAG.getConstant(5, MVT::i32));
- ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
- ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
+ // Turn 'a' into a mask suitable for VSELECT
+ SDValue VSelM = DAG.getConstant(0x80, VT);
+ SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
+ OpVSel, VSelM);
- std::vector<Constant*> CVM1(16, CM1);
- std::vector<Constant*> CVM2(16, CM2);
- Constant *C = ConstantVector::get(CVM1);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ SDValue CM1 = DAG.getConstant(0x0f, VT);
+ SDValue CM2 = DAG.getConstant(0x3f, VT);
- // r = pblendv(r, psllw(r & (char16)15, 4), a);
- M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ // r = VSELECT(r, psllw(r & (char16)15, 4), a);
+ SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+ OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
+ OpVSel, VSelM);
- C = ConstantVector::get(CVM2);
- CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
-
- // r = pblendv(r, psllw(r & (char16)63, 2), a);
- M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ // r = VSELECT(r, psllw(r & (char16)63, 2), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+ OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
+ OpVSel, VSelM);
- // return pblendv(r, r+r, a);
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
+ // return VSELECT(r, r+r, a);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
DAG.getNode(ISD::ADD, dl, VT, R, R), R);
return R;
}
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
index 2b98b5a..6f9188c 100644
--- a/test/CodeGen/X86/2011-12-15-vec_shift.ll
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -1,12 +1,19 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
; Test case for r146671
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7"
define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
- ; CHECK: psllw $4, [[REG:%xmm.]]
- ; CHECK-NEXT: movdqa
- ; CHECK-NEXT: pblendvb [[REG]],{{ %xmm.}}
+ ; Make sure operands to pblend are in the right order.
+ ; CHECK-W-SSE4: psllw $4, [[REG1:%xmm.]]
+ ; CHECK-W-SSE4: pblendvb [[REG1]],{{ %xmm.}}
+ ; CHECK-W-SSE4: psllw $2
+
+ ; Make sure we're masking and pcmp'ing the VSELECT conditon vector.
+ ; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]]
+ ; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]]
+ ; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]]
%1 = shl <16 x i8> %a, %b
ret <16 x i8> %1
}