aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorMon P Wang <wangmp@apple.com>2008-12-18 21:42:19 +0000
committerMon P Wang <wangmp@apple.com>2008-12-18 21:42:19 +0000
commit14edb09b3c70ade8dbd86dad07fcbe9486cca534 (patch)
tree0a478992f7ba11f6e07f040aee53d7513ba263f0 /lib
parentd53ce0b7c43541159f78b42c9f822f69c3d432cc (diff)
downloadexternal_llvm-14edb09b3c70ade8dbd86dad07fcbe9486cca534.zip
external_llvm-14edb09b3c70ade8dbd86dad07fcbe9486cca534.tar.gz
external_llvm-14edb09b3c70ade8dbd86dad07fcbe9486cca534.tar.bz2
Fixed x86 code generation of multiple for v2i64. It was incorrect for SSE4.1.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61211 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp47
-rw-r--r--lib/Target/X86/X86ISelLowering.h1
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1
-rw-r--r--lib/Target/X86/X86InstrSSE.td6
4 files changed, 49 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a9a2def..6ad9bd2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -687,6 +687,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
@@ -758,7 +759,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE41()) {
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
- setOperationAction(ISD::MUL, MVT::v2i64, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
@@ -6136,6 +6136,50 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
return Op;
}
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+
+ // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
+ // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
+ // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
+ // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
+ // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+ //
+ // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
+ // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+ // return AloBlo + AloBhi + AhiBlo;
+
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ A, DAG.getConstant(32, MVT::i32));
+ SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ B, DAG.getConstant(32, MVT::i32));
+ SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, B);
+ SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, Bhi);
+ SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ Ahi, B);
+ AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AloBhi, DAG.getConstant(32, MVT::i32));
+ AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AhiBlo, DAG.getConstant(32, MVT::i32));
+ SDValue Res = DAG.getNode(ISD::ADD, VT, AloBlo, AloBhi);
+ Res = DAG.getNode(ISD::ADD, VT, Res, AhiBlo);
+ return Res;
+}
+
+
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
@@ -6305,6 +6349,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+ case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 6ff1b99..5619e94 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -597,6 +597,7 @@ namespace llvm {
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 9e60a0f..b66cbd1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -581,7 +581,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMINSWrr, X86::PMINSWrm },
{ X86::PMINUBrr, X86::PMINUBrm },
{ X86::PMULDQrr, X86::PMULDQrm },
- { X86::PMULDQrr_int, X86::PMULDQrm_int },
{ X86::PMULHUWrr, X86::PMULHUWrm },
{ X86::PMULHWrr, X86::PMULHWrm },
{ X86::PMULLDrr, X86::PMULLDrm },
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 576c7e6..90b2231 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3313,12 +3313,13 @@ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
int_x86_sse41_pmaxuw, 1>;
+defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
+
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
(PCMPEQQrm VR128:$src1, addr:$src2)>;
-
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
let Constraints = "$src1 = $dst" in {
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
@@ -3353,9 +3354,6 @@ let Constraints = "$src1 = $dst" in {
}
defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
int_x86_sse41_pmulld, 1>;
-defm PMULDQ : SS41I_binop_patint<0x28, "pmuldq", v2i64, mul,
- int_x86_sse41_pmuldq, 1>;
-
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
let Constraints = "$src1 = $dst" in {