diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-10-06 13:11:09 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-10-06 13:11:09 +0000 |
commit | 714319a169784577e33fb1ea28ac06be32c9e735 (patch) | |
tree | 44e27f9869b10312a7f5d319d450fb743b0fddff | |
parent | 26ba5df2ebc123b1d214a5e7334c650e90d3738a (diff) | |
download | external_llvm-714319a169784577e33fb1ea28ac06be32c9e735.zip external_llvm-714319a169784577e33fb1ea28ac06be32c9e735.tar.gz external_llvm-714319a169784577e33fb1ea28ac06be32c9e735.tar.bz2 |
AVX-512: added scalar convert instructions and intrinsics.
Fixed load folding in VPERM2I instruction.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192063 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/IR/IntrinsicsX86.td | 30 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 182 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 9 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-cvt.ll | 31 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-intrinsics.ll | 58 |
6 files changed, 308 insertions, 13 deletions
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 34f43c3..bd22e1b 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -206,6 +206,22 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty], [IntrNoMem]>; + // avx-512 for unsigned conversion + def int_x86_avx512_cvtss2usi : GCCBuiltin<"__builtin_ia32_cvtss2usi">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi642ss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_i64_ty], [IntrNoMem]>; + def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">, @@ -484,6 +500,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_i64_ty], [IntrNoMem]>; def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v2f64_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7222754..8618853 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1330,7 +1330,16 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FMA, MVT::v16f32, Legal); setOperationAction(ISD::SDIV, MVT::v16i32, Custom); - + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); + } setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index e87fbca..038f8d8 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2215,42 +2215,208 @@ multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + EVEX_4V; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + EVEX_4V; } // neverHasSideEffects = 1 } defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, +defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, +defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), - (VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>; + (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), - (VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>; + (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp GR32:$src)), (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f32 (sint_to_fp GR64:$src)), - (VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>; + (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; def : Pat<(f64 (sint_to_fp GR32:$src)), (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f64 (sint_to_fp GR64:$src)), - (VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>; + (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; +defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}{z}">, + XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}{z}">, + XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}{z}">, + XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}{z}">, + XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), + (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))), + (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))), + (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))), + (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; + +def : Pat<(f32 (uint_to_fp GR32:$src)), + (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f32 (uint_to_fp GR64:$src)), + (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; +def : Pat<(f64 (uint_to_fp GR32:$src)), + (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f64 (uint_to_fp GR64:$src)), + (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; + + +//===----------------------------------------------------------------------===// +// AVX-512 Scalar convert from float/double to integer +//===----------------------------------------------------------------------===// +multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, Operand memop, ComplexPattern mem_cpat, + string asm> { +let neverHasSideEffects = 1 in { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), + [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG; + let mayLoad = 1 in + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG; +} // neverHasSideEffects = 1 +} +let Predicates = [HasAVX512] in { +// Convert float/double to signed/unsigned int 32/64 +defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, + ssmem, sse_load_f32, "cvtss2si{z}">, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64, + ssmem, sse_load_f32, "cvtss2si{z}">, + XS, VEX_W, EVEX_CD8<32, CD8VT1>; +defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi, + ssmem, sse_load_f32, "cvtss2usi{z}">, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, + int_x86_avx512_cvtss2usi64, ssmem, + sse_load_f32, "cvtss2usi{z}">, XS, VEX_W, + EVEX_CD8<32, CD8VT1>; +defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, + sdmem, sse_load_f64, "cvtsd2si{z}">, + XD, EVEX_CD8<64, CD8VT1>; +defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64, + sdmem, sse_load_f64, "cvtsd2si{z}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi, + sdmem, sse_load_f64, "cvtsd2usi{z}">, + XD, EVEX_CD8<64, CD8VT1>; +defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, + int_x86_avx512_cvtsd2usi64, sdmem, + sse_load_f64, "cvtsd2usi{z}">, XD, VEX_W, + EVEX_CD8<64, CD8VT1>; + +defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}{z}", + SSE_CVT_Scalar, 0>, XS, EVEX_4V; +defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}{z}", + SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; +defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}{z}", + SSE_CVT_Scalar, 0>, XD, EVEX_4V; +defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}{z}", + SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; + +defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, + int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}{z}", + SSE_CVT_Scalar, 0>, XS, EVEX_4V; +defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, + int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}{z}", + SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; +defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, + int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}{z}", + SSE_CVT_Scalar, 0>, XD, EVEX_4V; +defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, + int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}{z}", + SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; + +// Convert float/double to signed/unsigned int 32/64 with truncation +defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si, + ssmem, sse_load_f32, "cvttss2si{z}">, + XS, EVEX_CD8<32, CD8VT1>; +defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si{z}">, XS, VEX_W, + EVEX_CD8<32, CD8VT1>; +defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si, + sdmem, sse_load_f64, "cvttsd2si{z}">, XD, + EVEX_CD8<64, CD8VT1>; +defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si{z}">, XD, VEX_W, + EVEX_CD8<64, CD8VT1>; +defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, + int_x86_avx512_cvttss2usi, ssmem, sse_load_f32, + "cvttss2si{z}">, XS, EVEX_CD8<32, CD8VT1>; +defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, + int_x86_avx512_cvttss2usi64, ssmem, + sse_load_f32, "cvttss2usi{z}">, XS, VEX_W, + EVEX_CD8<32, CD8VT1>; +defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, + int_x86_avx512_cvttsd2usi, + sdmem, sse_load_f64, "cvttsd2usi{z}">, XD, + EVEX_CD8<64, CD8VT1>; +defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, + int_x86_avx512_cvttsd2usi64, sdmem, + sse_load_f64, "cvttsd2usi{z}">, XD, VEX_W, + EVEX_CD8<64, CD8VT1>; +} + +multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), + [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX; +} + +defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem, + loadf32, "cvttss2si{z}">, XS, + EVEX_CD8<32, CD8VT1>; +defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem, + loadf32, "cvttss2usi{z}">, XS, + EVEX_CD8<32, CD8VT1>; +defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem, + loadf32, "cvttss2si{z}">, XS, VEX_W, + EVEX_CD8<32, CD8VT1>; +defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem, + loadf32, "cvttss2usi{z}">, XS, VEX_W, + EVEX_CD8<32, CD8VT1>; +defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem, + loadf64, "cvttsd2si{z}">, XD, + EVEX_CD8<64, CD8VT1>; +defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem, + loadf64, "cvttsd2usi{z}">, XD, + EVEX_CD8<64, CD8VT1>; +defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem, + loadf64, "cvttsd2si{z}">, XD, VEX_W, + EVEX_CD8<64, CD8VT1>; +defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem, + loadf64, "cvttsd2usi{z}">, XD, VEX_W, + EVEX_CD8<64, CD8VT1>; //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 20b42d7..277e043 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1232,10 +1232,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, - { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, - { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, - { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, - { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, @@ -1425,6 +1421,11 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 }, { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 }, { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 }, + // AVX-512 VPERMI instructions with 3 source operands. + { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, + { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, + { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, + { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 543bb5e..bbad507 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -184,3 +184,34 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind { ret <16 x float> %b } +; CHECK-LABEL: @fptosi02 +; CHECK vcvttss2siz +; CHECK: ret +define i32 @fptosi02(float %a) nounwind { + %b = fptosi float %a to i32 + ret i32 %b +} + +; CHECK-LABEL: @fptoui02 +; CHECK vcvttss2usiz +; CHECK: ret +define i32 @fptoui02(float %a) nounwind { + %b = fptoui float %a to i32 + ret i32 %b +} + +; CHECK-LABEL: @uitofp02 +; CHECK vcvtusi2ss +; CHECK: ret +define float @uitofp02(i32 %a) nounwind { + %b = uitofp i32 %a to float + ret float %b +} + +; CHECK-LABEL: @uitofp03 +; CHECK vcvtusi2sd +; CHECK: ret +define double @uitofp03(i32 %a) nounwind { + %b = uitofp i32 %a to double + ret double %b +} diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index c0ac719..dc2ab85 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -86,3 +86,61 @@ define <2 x double> @test_x86_avx3_sqrt_sd(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone +define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { + ; CHECK: vcvtsd2siz + %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone + +define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { + ; CHECK: vcvtsi2sdqz + %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone + +define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) { + ; CHECK: vcvtusi2sdqz + %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone + +define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { + ; CHECK: vcvttsd2siz + %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone + + +define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { + ; CHECK: vcvtss2siz + %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { + ; CHECK: vcvtsi2ssqz + %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone + + +define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { + ; CHECK: vcvttss2siz + %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone + +define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { + ; CHECK: vcvtsd2usiz + %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone |