diff options
author | Mon P Wang <wangmp@apple.com> | 2008-12-18 21:42:19 +0000 |
---|---|---|
committer | Mon P Wang <wangmp@apple.com> | 2008-12-18 21:42:19 +0000 |
commit | 14edb09b3c70ade8dbd86dad07fcbe9486cca534 (patch) | |
tree | 0a478992f7ba11f6e07f040aee53d7513ba263f0 | |
parent | d53ce0b7c43541159f78b42c9f822f69c3d432cc (diff) | |
download | external_llvm-14edb09b3c70ade8dbd86dad07fcbe9486cca534.zip external_llvm-14edb09b3c70ade8dbd86dad07fcbe9486cca534.tar.gz external_llvm-14edb09b3c70ade8dbd86dad07fcbe9486cca534.tar.bz2 |
Fixed x86 code generation of multiple for v2i64. It was incorrect for SSE4.1.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61211 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 47 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 6 |
4 files changed, 49 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a9a2def..6ad9bd2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -687,6 +687,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ADD, MVT::v8i16, Legal); setOperationAction(ISD::ADD, MVT::v4i32, Legal); setOperationAction(ISD::ADD, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::SUB, MVT::v16i8, Legal); setOperationAction(ISD::SUB, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v4i32, Legal); @@ -758,7 +759,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE41()) { // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); - setOperationAction(ISD::MUL, MVT::v2i64, Legal); // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are @@ -6136,6 +6136,50 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { return Op; } +SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); + + // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32); + // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32); + // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b ); + // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi ); + // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b ); + // + // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 ); + // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 ); + // return AloBlo + AloBhi + AhiBlo; + + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + + SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), + A, DAG.getConstant(32, MVT::i32)); + SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), + B, DAG.getConstant(32, MVT::i32)); + SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, + DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), + A, B); + SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, + DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), + A, Bhi); + SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, + DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), + Ahi, B); + AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), + AloBhi, DAG.getConstant(32, MVT::i32)); + AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), + AhiBlo, DAG.getConstant(32, MVT::i32)); + SDValue Res = DAG.getNode(ISD::ADD, VT, AloBlo, AloBhi); + Res = DAG.getNode(ISD::ADD, VT, Res, AhiBlo); + return Res; +} + + SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus // a "setcc" instruction that checks the overflow flag. The "brcond" lowering @@ -6305,6 +6349,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); + case ISD::MUL: return LowerMUL_V2I64(Op, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6ff1b99..5619e94 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -597,6 +597,7 @@ namespace llvm { SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG); SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG); + SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG); SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG); SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 9e60a0f..b66cbd1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -581,7 +581,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMINSWrr, X86::PMINSWrm }, { X86::PMINUBrr, X86::PMINUBrm }, { X86::PMULDQrr, X86::PMULDQrm }, - { X86::PMULDQrr_int, X86::PMULDQrm_int }, { X86::PMULHUWrr, X86::PMULHUWrm }, { X86::PMULHWrr, X86::PMULHWrm }, { X86::PMULLDrr, X86::PMULLDrm }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 576c7e6..90b2231 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3313,12 +3313,13 @@ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw, 1>; +defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>; + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), (PCMPEQQrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), (PCMPEQQrm VR128:$src1, addr:$src2)>; - /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator let Constraints = "$src1 = $dst" in { multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT, @@ -3353,9 +3354,6 @@ let Constraints = "$src1 = $dst" in { } defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul, int_x86_sse41_pmulld, 1>; -defm PMULDQ : SS41I_binop_patint<0x28, "pmuldq", v2i64, mul, - int_x86_sse41_pmuldq, 1>; - /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate let Constraints = "$src1 = $dst" in { |