diff options
author | Jan Sjödin <jan_sjodin@yahoo.com> | 2012-01-11 15:20:20 +0000 |
---|---|---|
committer | Jan Sjödin <jan_sjodin@yahoo.com> | 2012-01-11 15:20:20 +0000 |
commit | 46df3adb4e12e7f607a5bd21335311604834ba7e (patch) | |
tree | 1b2e0c130d06b57888461f9c5da463609507acc1 | |
parent | 394a1f53b90698486ac7c75724a6bda349cd0353 (diff) | |
download | external_llvm-46df3adb4e12e7f607a5bd21335311604834ba7e.zip external_llvm-46df3adb4e12e7f607a5bd21335311604834ba7e.tar.gz external_llvm-46df3adb4e12e7f607a5bd21335311604834ba7e.tar.bz2 |
Add XOP Intrinsics and tests
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147949 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/IntrinsicsX86.td | 529 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrXOP.td | 735 | ||||
-rw-r--r-- | test/CodeGen/X86/xop-intrinsics-x86_64.ll | 1059 |
3 files changed, 2250 insertions, 73 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 2d5d9ff..0a14d66 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1960,6 +1960,535 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } //===----------------------------------------------------------------------===// +// XOP + + def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_xop_vpermil2pd_256 : + GCCBuiltin<"__builtin_ia32_vpermil2pd256">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpermil2ps_256 : + GCCBuiltin<"__builtin_ia32_vpermil2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_xop_vfrcz_pd : + GCCBuiltin<"__builtin_ia32_vfrczpd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_ps : + GCCBuiltin<"__builtin_ia32_vfrczps">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_sd : + GCCBuiltin<"__builtin_ia32_vfrczsd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_xop_vfrcz_ss : + GCCBuiltin<"__builtin_ia32_vfrczss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_xop_vfrcz_pd_256 : + GCCBuiltin<"__builtin_ia32_vfrczpd256">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_ps_256 : + GCCBuiltin<"__builtin_ia32_vfrczps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; + def int_x86_xop_vpcmov : + GCCBuiltin<"__builtin_ia32_vpcmov">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v2di : + GCCBuiltin<"__builtin_ia32_vpcmov_v2di">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v4si : + GCCBuiltin<"__builtin_ia32_vpcmov_v4si">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v8hi : + GCCBuiltin<"__builtin_ia32_vpcmov_v8hi">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v16qi : + GCCBuiltin<"__builtin_ia32_vpcmov_v16qi">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v2df : + GCCBuiltin<"__builtin_ia32_vpcmov_v2df">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v4sf : + GCCBuiltin<"__builtin_ia32_vpcmov_v4sf">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_256 : + GCCBuiltin<"__builtin_ia32_vpcmov_256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v4di_256 : + GCCBuiltin<"__builtin_ia32_vpcmov_v4di256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v8si_256 : + GCCBuiltin<"__builtin_ia32_vpcmov_v8si256">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v16hi_256 : + GCCBuiltin<"__builtin_ia32_vpcmov_v16hi256">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v32qi_256 : + GCCBuiltin<"__builtin_ia32_vpcmov_v32qi256">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v4df_256 : + GCCBuiltin<"__builtin_ia32_vpcmov_v4df256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_v8sf_256 : + GCCBuiltin<"__builtin_ia32_vpcmov_v8sf256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomeqb : + GCCBuiltin<"__builtin_ia32_vpcomeqb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomeqw : + GCCBuiltin<"__builtin_ia32_vpcomeqw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomeqd : + GCCBuiltin<"__builtin_ia32_vpcomeqd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomeqq : + GCCBuiltin<"__builtin_ia32_vpcomeqq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomequb : + GCCBuiltin<"__builtin_ia32_vpcomequb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomequd : + GCCBuiltin<"__builtin_ia32_vpcomequd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomequq : + GCCBuiltin<"__builtin_ia32_vpcomequq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomequw : + GCCBuiltin<"__builtin_ia32_vpcomequw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseb : + GCCBuiltin<"__builtin_ia32_vpcomfalseb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalsed : + GCCBuiltin<"__builtin_ia32_vpcomfalsed">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseq : + GCCBuiltin<"__builtin_ia32_vpcomfalseq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseub : + GCCBuiltin<"__builtin_ia32_vpcomfalseub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseud : + GCCBuiltin<"__builtin_ia32_vpcomfalseud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseuq : + GCCBuiltin<"__builtin_ia32_vpcomfalseuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseuw : + GCCBuiltin<"__builtin_ia32_vpcomfalseuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalsew : + GCCBuiltin<"__builtin_ia32_vpcomfalsew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeb : + GCCBuiltin<"__builtin_ia32_vpcomgeb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomged : + GCCBuiltin<"__builtin_ia32_vpcomged">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeq : + GCCBuiltin<"__builtin_ia32_vpcomgeq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeub : + GCCBuiltin<"__builtin_ia32_vpcomgeub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeud : + GCCBuiltin<"__builtin_ia32_vpcomgeud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeuq : + GCCBuiltin<"__builtin_ia32_vpcomgeuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeuw : + GCCBuiltin<"__builtin_ia32_vpcomgeuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgew : + GCCBuiltin<"__builtin_ia32_vpcomgew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtb : + GCCBuiltin<"__builtin_ia32_vpcomgtb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtd : + GCCBuiltin<"__builtin_ia32_vpcomgtd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtq : + GCCBuiltin<"__builtin_ia32_vpcomgtq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtub : + GCCBuiltin<"__builtin_ia32_vpcomgtub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtud : + GCCBuiltin<"__builtin_ia32_vpcomgtud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtuq : + GCCBuiltin<"__builtin_ia32_vpcomgtuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtuw : + GCCBuiltin<"__builtin_ia32_vpcomgtuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtw : + GCCBuiltin<"__builtin_ia32_vpcomgtw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleb : + GCCBuiltin<"__builtin_ia32_vpcomleb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomled : + GCCBuiltin<"__builtin_ia32_vpcomled">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleq : + GCCBuiltin<"__builtin_ia32_vpcomleq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleub : + GCCBuiltin<"__builtin_ia32_vpcomleub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleud : + GCCBuiltin<"__builtin_ia32_vpcomleud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleuq : + GCCBuiltin<"__builtin_ia32_vpcomleuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleuw : + GCCBuiltin<"__builtin_ia32_vpcomleuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomlew : + GCCBuiltin<"__builtin_ia32_vpcomlew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltb : + GCCBuiltin<"__builtin_ia32_vpcomltb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltd : + GCCBuiltin<"__builtin_ia32_vpcomltd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltq : + GCCBuiltin<"__builtin_ia32_vpcomltq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltub : + GCCBuiltin<"__builtin_ia32_vpcomltub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltud : + GCCBuiltin<"__builtin_ia32_vpcomltud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltuq : + GCCBuiltin<"__builtin_ia32_vpcomltuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltuw : + GCCBuiltin<"__builtin_ia32_vpcomltuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltw : + GCCBuiltin<"__builtin_ia32_vpcomltw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneb : + GCCBuiltin<"__builtin_ia32_vpcomneb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomned : + GCCBuiltin<"__builtin_ia32_vpcomned">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneq : + GCCBuiltin<"__builtin_ia32_vpcomneq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneub : + GCCBuiltin<"__builtin_ia32_vpcomneub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneud : + GCCBuiltin<"__builtin_ia32_vpcomneud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneuq : + GCCBuiltin<"__builtin_ia32_vpcomneuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneuw : + GCCBuiltin<"__builtin_ia32_vpcomneuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomnew : + GCCBuiltin<"__builtin_ia32_vpcomnew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueb : + GCCBuiltin<"__builtin_ia32_vpcomtrueb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrued : + GCCBuiltin<"__builtin_ia32_vpcomtrued">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueq : + GCCBuiltin<"__builtin_ia32_vpcomtrueq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueub : + GCCBuiltin<"__builtin_ia32_vpcomtrueub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueud : + GCCBuiltin<"__builtin_ia32_vpcomtrueud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueuq : + GCCBuiltin<"__builtin_ia32_vpcomtrueuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueuw : + GCCBuiltin<"__builtin_ia32_vpcomtrueuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtruew : + GCCBuiltin<"__builtin_ia32_vpcomtruew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vphaddbd : + GCCBuiltin<"__builtin_ia32_vphaddbd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddbq : + GCCBuiltin<"__builtin_ia32_vphaddbq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddbw : + GCCBuiltin<"__builtin_ia32_vphaddbw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphadddq : + GCCBuiltin<"__builtin_ia32_vphadddq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_xop_vphaddubd : + GCCBuiltin<"__builtin_ia32_vphaddubd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddubq : + GCCBuiltin<"__builtin_ia32_vphaddubq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddubw : + GCCBuiltin<"__builtin_ia32_vphaddubw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddudq : + GCCBuiltin<"__builtin_ia32_vphaddudq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_xop_vphadduwd : + GCCBuiltin<"__builtin_ia32_vphadduwd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vphadduwq : + GCCBuiltin<"__builtin_ia32_vphadduwq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vphaddwd : + GCCBuiltin<"__builtin_ia32_vphaddwd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vphaddwq : + GCCBuiltin<"__builtin_ia32_vphaddwq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vphsubbw : + GCCBuiltin<"__builtin_ia32_vphsubbw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphsubdq : + GCCBuiltin<"__builtin_ia32_vphsubdq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_xop_vphsubwd : + GCCBuiltin<"__builtin_ia32_vphsubwd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vpmacsdd : + GCCBuiltin<"__builtin_ia32_vpmacsdd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacsdqh : + GCCBuiltin<"__builtin_ia32_vpmacsdqh">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacsdql : + GCCBuiltin<"__builtin_ia32_vpmacsdql">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacssdd : + GCCBuiltin<"__builtin_ia32_vpmacssdd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacssdqh : + GCCBuiltin<"__builtin_ia32_vpmacssdqh">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacssdql : + GCCBuiltin<"__builtin_ia32_vpmacssdql">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacsswd : + GCCBuiltin<"__builtin_ia32_vpmacsswd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacssww : + GCCBuiltin<"__builtin_ia32_vpmacssww">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacswd : + GCCBuiltin<"__builtin_ia32_vpmacswd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacsww : + GCCBuiltin<"__builtin_ia32_vpmacsww">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpmadcsswd : + GCCBuiltin<"__builtin_ia32_vpmadcsswd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmadcswd : + GCCBuiltin<"__builtin_ia32_vpmadcswd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpperm : + GCCBuiltin<"__builtin_ia32_vpperm">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vprotb : + GCCBuiltin<"__builtin_ia32_vprotb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vprotd : + GCCBuiltin<"__builtin_ia32_vprotd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vprotq : + GCCBuiltin<"__builtin_ia32_vprotq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vprotw : + GCCBuiltin<"__builtin_ia32_vprotw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpshab : + GCCBuiltin<"__builtin_ia32_vpshab">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpshad : + GCCBuiltin<"__builtin_ia32_vpshad">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpshaq : + GCCBuiltin<"__builtin_ia32_vpshaq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpshaw : + GCCBuiltin<"__builtin_ia32_vpshaw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpshlb : + GCCBuiltin<"__builtin_ia32_vpshlb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpshld : + GCCBuiltin<"__builtin_ia32_vpshld">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpshlq : + GCCBuiltin<"__builtin_ia32_vpshlq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpshlw : + GCCBuiltin<"__builtin_ia32_vpshlw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +//===----------------------------------------------------------------------===// // MMX // Empty MMX state op. diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index aef2c3a..0734333 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -1,89 +1,119 @@ -//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===// +//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-====// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===----------------------------------------------------------------------===// +//===-----------------------------------------------------------------------===// // // This file describes XOP (eXtended OPerations) // -//===----------------------------------------------------------------------===// +//===-----------------------------------------------------------------------===// -multiclass xop2op<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> { +multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, VEX; - def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), + [(set VR128:$dst, (Int VR128:$src))]>, VEX; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, VEX; + [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX; +} + +let isAsmParserOnly = 1 in { + defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>; + defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>; + defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>; + defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>; + defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>; + defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>; + defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>; + defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>; + defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>; + defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>; + defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>; + defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>; + defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>; + defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>; + defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>; + defm VFRCZPS : xop2op<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>; + defm VFRCZPD : xop2op<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>; +} + +// Scalar load 2 addr operand instructions +let Constraints = "$src1 = $dst" in { +multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int, + Operand memop, ComplexPattern mem_cpat> { + def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, + VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, + memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (Int VR128:$src1, + (bitconvert mem_cpat:$src2)))]>, VEX; } +} // Constraints = "$src1 = $dst" + let isAsmParserOnly = 1 in { - defm VPHSUBWD : xop2op<0xE2, "vphsubwd", f128mem>; - defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", f128mem>; - defm VPHSUBBW : xop2op<0xE1, "vphsubbw", f128mem>; - defm VPHADDWQ : xop2op<0xC7, "vphaddwq", f128mem>; - defm VPHADDWD : xop2op<0xC6, "vphaddwd", f128mem>; - defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>; - defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>; - defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>; - defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>; - defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>; - defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>; - defm VPHADDDQ : xop2op<0xCB, "vphadddq", f128mem>; - defm VPHADDBW : xop2op<0xC1, "vphaddbw", f128mem>; - defm VPHADDBQ : xop2op<0xC3, "vphaddbq", f128mem>; - defm VPHADDBD : xop2op<0xC2, "vphaddbd", f128mem>; - defm VFRCZSS : xop2op<0x82, "vfrczss", f32mem>; - defm VFRCZSD : xop2op<0x83, "vfrczsd", f64mem>; - defm VFRCZPS : xop2op<0x80, "vfrczps", f128mem>; - defm VFRCZPD : xop2op<0x81, "vfrczpd", f128mem>; -} - -multiclass xop2op256<bits<8> opc, string OpcodeStr> { + defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss, + ssmem, sse_load_f32>; + defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd, + sdmem, sse_load_f64>; +} + + +multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int, + PatFrag memop> { def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, VEX, VEX_L; + [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L; def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, VEX; + [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX; } let isAsmParserOnly = 1 in { - defm VFRCZPS : xop2op256<0x80, "vfrczps">; - defm VFRCZPD : xop2op256<0x81, "vfrczpd">; + defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, + memopv8f32>; + defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, + memopv4f64>; } -multiclass xop3op<bits<8> opc, string OpcodeStr> { +multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> { def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4VOp3; + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX_4VOp3; def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_W; + [(set VR128:$dst, + (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>, + VEX_4V, VEX_W; def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4VOp3; + [(set VR128:$dst, + (Int (bitconvert (memopv2i64 addr:$src1)), VR128:$src2))]>, + VEX_4VOp3; } let isAsmParserOnly = 1 in { - defm VPSHLW : xop3op<0x95, "vpshlw">; - defm VPSHLQ : xop3op<0x97, "vpshlq">; - defm VPSHLD : xop3op<0x96, "vpshld">; - defm VPSHLB : xop3op<0x94, "vpshlb">; - defm VPSHAW : xop3op<0x99, "vpshaw">; - defm VPSHAQ : xop3op<0x9B, "vpshaq">; - defm VPSHAD : xop3op<0x9A, "vpshad">; - defm VPSHAB : xop3op<0x98, "vpshab">; - defm VPROTW : xop3op<0x91, "vprotw">; - defm VPROTQ : xop3op<0x93, "vprotq">; - defm VPROTD : xop3op<0x92, "vprotd">; - defm VPROTB : xop3op<0x90, "vprotb">; + defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>; + defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>; + defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>; + defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>; + defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>; + defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>; + defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>; + defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>; + defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>; + defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>; + defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>; + defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>; } multiclass xop3opimm<bits<8> opc, string OpcodeStr> { @@ -105,32 +135,35 @@ let isAsmParserOnly = 1 in { } // Instruction where second source can be memory, but third must be register -multiclass xop4opm2<bits<8> opc, string OpcodeStr> { +multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> { def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM; + [(set VR128:$dst, + (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM; def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM; + [(set VR128:$dst, + (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3))]>, VEX_4V, VEX_I8IMM; } let isAsmParserOnly = 1 in { - defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd">; - defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">; - defm VPMACSWW : xop4opm2<0x95, "vpmacsww">; - defm VPMACSWD : xop4opm2<0x96, "vpmacswd">; - defm VPMACSSWW : xop4opm2<0x85, "vpmacssww">; - defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd">; - defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">; - defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">; - defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd">; - defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql">; - defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh">; - defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd">; + defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>; + defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>; + defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>; + defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>; + defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>; + defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>; + defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>; + defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>; + defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>; + defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>; + defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>; + defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>; } // Instruction where second source can be memory, third must be imm8 @@ -204,38 +237,594 @@ let isAsmParserOnly = 1 in { defm VPCMOV : xop4op256<0xA2, "vpcmov">; } -multiclass xop5op<bits<8> opc, string OpcodeStr> { +multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128, + Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> { def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>; + [(set VR128:$dst, + (Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>; def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W, MemOp4; + [(set VR128:$dst, + (Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>, + VEX_W, MemOp4; def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>; + [(set VR128:$dst, + (Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>; def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>; + [(set VR256:$dst, + (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>; def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W, MemOp4; + [(set VR256:$dst, + (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>, + VEX_W, MemOp4; def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>; + [(set VR256:$dst, + (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>; } -defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">; -defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">; +defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd, + int_x86_xop_vpermil2pd_256, memopv2f64, memopv4f64>; +defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps, + int_x86_xop_vpermil2ps_256, memopv4f32, memopv8f32>; + +// XOP Intrinsics patterns + +// VPCOM EQ +def : Pat<(int_x86_xop_vpcomeqw VR128:$src1, VR128:$src2), + (VPCOMWri VR128:$src1, VR128:$src2, (i8 4))>; +def : Pat<(int_x86_xop_vpcomeqw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMWmi VR128:$src1, addr:$src2, (i8 4))>; + +def : Pat<(int_x86_xop_vpcomequw VR128:$src1, VR128:$src2), + (VPCOMUWri VR128:$src1, VR128:$src2, (i8 4))>; +def : Pat<(int_x86_xop_vpcomequw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUWmi VR128:$src1, addr:$src2, (i8 4))>; + +def : Pat<(int_x86_xop_vpcomequq VR128:$src1, VR128:$src2), + (VPCOMUQri VR128:$src1, VR128:$src2, (i8 4))>; +def : Pat<(int_x86_xop_vpcomequq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUQmi VR128:$src1, addr:$src2, (i8 4))>; + +def : Pat<(int_x86_xop_vpcomequd VR128:$src1, VR128:$src2), + (VPCOMUDri VR128:$src1, VR128:$src2, (i8 4))>; +def : Pat<(int_x86_xop_vpcomequd VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUDmi VR128:$src1, addr:$src2, (i8 4))>; + +def : Pat<(int_x86_xop_vpcomequb VR128:$src1, VR128:$src2), + (VPCOMUBri VR128:$src1, VR128:$src2, (i8 4))>; +def : Pat<(int_x86_xop_vpcomequb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUBmi VR128:$src1, addr:$src2, (i8 4))>; + +def : Pat<(int_x86_xop_vpcomeqq VR128:$src1, VR128:$src2), + (VPCOMQri VR128:$src1, VR128:$src2, (i8 4))>; +def : Pat<(int_x86_xop_vpcomeqq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMQmi VR128:$src1, addr:$src2, (i8 4))>; + +def : Pat<(int_x86_xop_vpcomeqd VR128:$src1, VR128:$src2), + (VPCOMDri VR128:$src1, VR128:$src2, (i8 4))>; +def : Pat<(int_x86_xop_vpcomeqd VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMDmi VR128:$src1, addr:$src2, (i8 4))>; + +def : Pat<(int_x86_xop_vpcomeqb VR128:$src1, VR128:$src2), + (VPCOMBri VR128:$src1, VR128:$src2, (i8 4))>; +def : Pat<(int_x86_xop_vpcomeqb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMBmi VR128:$src1, addr:$src2, (i8 4))>; + +// VPCOM FALSE +def : Pat<(int_x86_xop_vpcomfalsew VR128:$src1, VR128:$src2), + (VPCOMWri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomfalsew VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMWmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomfalseuw VR128:$src1, VR128:$src2), + (VPCOMUWri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomfalseuw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUWmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomfalseuq VR128:$src1, VR128:$src2), + (VPCOMUQri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomfalseuq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUQmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomfalseud VR128:$src1, VR128:$src2), + (VPCOMUDri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomfalseud VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUDmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomfalseub VR128:$src1, VR128:$src2), + (VPCOMUBri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomfalseub VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUBmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomfalseq VR128:$src1, VR128:$src2), + (VPCOMQri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomfalseq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMQmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomfalsed VR128:$src1, VR128:$src2), + (VPCOMDri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomfalsed VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMDmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomfalseb VR128:$src1, VR128:$src2), + (VPCOMBri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomfalseb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMBmi VR128:$src1, addr:$src2, (i8 6))>; + +// VPCOM GE +def : Pat<(int_x86_xop_vpcomgew VR128:$src1, VR128:$src2), + (VPCOMWri VR128:$src1, VR128:$src2, (i8 3))>; +def : Pat<(int_x86_xop_vpcomgew VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMWmi VR128:$src1, addr:$src2, (i8 3))>; + +def : Pat<(int_x86_xop_vpcomgeuw VR128:$src1, VR128:$src2), + (VPCOMUWri VR128:$src1, VR128:$src2, (i8 3))>; +def : Pat<(int_x86_xop_vpcomgeuw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUWmi VR128:$src1, addr:$src2, (i8 3))>; + +def : Pat<(int_x86_xop_vpcomgeuq VR128:$src1, VR128:$src2), + (VPCOMUQri VR128:$src1, VR128:$src2, (i8 3))>; +def : Pat<(int_x86_xop_vpcomgeuq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUQmi VR128:$src1, addr:$src2, (i8 3))>; + +def : Pat<(int_x86_xop_vpcomgeud VR128:$src1, VR128:$src2), + (VPCOMUDri VR128:$src1, VR128:$src2, (i8 3))>; +def : Pat<(int_x86_xop_vpcomgeud VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUDmi VR128:$src1, addr:$src2, (i8 3))>; + +def : Pat<(int_x86_xop_vpcomgeub VR128:$src1, VR128:$src2), + (VPCOMUBri VR128:$src1, VR128:$src2, (i8 3))>; +def : Pat<(int_x86_xop_vpcomgeub VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUBmi VR128:$src1, addr:$src2, (i8 3))>; + +def : Pat<(int_x86_xop_vpcomgeq VR128:$src1, VR128:$src2), + (VPCOMQri VR128:$src1, VR128:$src2, (i8 3))>; +def : Pat<(int_x86_xop_vpcomgeq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMQmi VR128:$src1, addr:$src2, (i8 3))>; + +def : Pat<(int_x86_xop_vpcomged VR128:$src1, VR128:$src2), + (VPCOMDri VR128:$src1, VR128:$src2, (i8 3))>; +def : Pat<(int_x86_xop_vpcomged VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMDmi VR128:$src1, addr:$src2, (i8 3))>; + +def : Pat<(int_x86_xop_vpcomgeb VR128:$src1, VR128:$src2), + (VPCOMBri VR128:$src1, VR128:$src2, (i8 3))>; +def : Pat<(int_x86_xop_vpcomgeb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMBmi VR128:$src1, addr:$src2, (i8 3))>; + +// VPCOM GT +def : Pat<(int_x86_xop_vpcomgtw VR128:$src1, VR128:$src2), + (VPCOMWri VR128:$src1, VR128:$src2, (i8 2))>; +def : Pat<(int_x86_xop_vpcomgtw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMWmi VR128:$src1, addr:$src2, (i8 2))>; + +def : Pat<(int_x86_xop_vpcomgtuw VR128:$src1, VR128:$src2), + (VPCOMUWri VR128:$src1, VR128:$src2, (i8 2))>; +def : Pat<(int_x86_xop_vpcomgtuw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUWmi VR128:$src1, addr:$src2, (i8 2))>; + +def : Pat<(int_x86_xop_vpcomgtuq VR128:$src1, VR128:$src2), + (VPCOMUQri VR128:$src1, VR128:$src2, (i8 2))>; +def : Pat<(int_x86_xop_vpcomgtuq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUQmi VR128:$src1, addr:$src2, (i8 2))>; + +def : Pat<(int_x86_xop_vpcomgtud VR128:$src1, VR128:$src2), + (VPCOMUDri VR128:$src1, VR128:$src2, (i8 2))>; +def : Pat<(int_x86_xop_vpcomgtud VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUDmi VR128:$src1, addr:$src2, (i8 2))>; + +def : Pat<(int_x86_xop_vpcomgtub VR128:$src1, VR128:$src2), + (VPCOMUBri VR128:$src1, VR128:$src2, (i8 2))>; +def : Pat<(int_x86_xop_vpcomgtub VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUBmi VR128:$src1, addr:$src2, (i8 2))>; + +def : Pat<(int_x86_xop_vpcomgtq VR128:$src1, VR128:$src2), + (VPCOMQri VR128:$src1, VR128:$src2, (i8 2))>; +def : Pat<(int_x86_xop_vpcomgtq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMQmi VR128:$src1, addr:$src2, (i8 2))>; + +def : Pat<(int_x86_xop_vpcomgtd VR128:$src1, VR128:$src2), + (VPCOMDri VR128:$src1, VR128:$src2, (i8 2))>; +def : Pat<(int_x86_xop_vpcomgtd VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMDmi VR128:$src1, addr:$src2, (i8 2))>; + +def : Pat<(int_x86_xop_vpcomgtb VR128:$src1, VR128:$src2), + (VPCOMBri VR128:$src1, VR128:$src2, (i8 2))>; +def : Pat<(int_x86_xop_vpcomgtb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMBmi VR128:$src1, addr:$src2, (i8 2))>; + +// VPCOM LE +def : Pat<(int_x86_xop_vpcomlew VR128:$src1, VR128:$src2), + (VPCOMWri VR128:$src1, VR128:$src2, (i8 1))>; +def : Pat<(int_x86_xop_vpcomlew VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMWmi VR128:$src1, addr:$src2, (i8 1))>; + +def : Pat<(int_x86_xop_vpcomleuw VR128:$src1, VR128:$src2), + (VPCOMUWri VR128:$src1, VR128:$src2, (i8 1))>; +def : Pat<(int_x86_xop_vpcomleuw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUWmi VR128:$src1, addr:$src2, (i8 1))>; + +def : Pat<(int_x86_xop_vpcomleuq VR128:$src1, VR128:$src2), + (VPCOMUQri VR128:$src1, VR128:$src2, (i8 1))>; +def : Pat<(int_x86_xop_vpcomleuq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUQmi VR128:$src1, addr:$src2, (i8 1))>; + +def : Pat<(int_x86_xop_vpcomleud VR128:$src1, VR128:$src2), + (VPCOMUDri VR128:$src1, VR128:$src2, (i8 1))>; +def : Pat<(int_x86_xop_vpcomleud VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUDmi VR128:$src1, addr:$src2, (i8 1))>; + +def : Pat<(int_x86_xop_vpcomleub VR128:$src1, VR128:$src2), + (VPCOMUBri VR128:$src1, VR128:$src2, (i8 1))>; +def : Pat<(int_x86_xop_vpcomleub VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUBmi VR128:$src1, addr:$src2, (i8 1))>; + +def : Pat<(int_x86_xop_vpcomleq VR128:$src1, VR128:$src2), + (VPCOMQri VR128:$src1, VR128:$src2, (i8 1))>; +def : Pat<(int_x86_xop_vpcomleq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMQmi VR128:$src1, addr:$src2, (i8 1))>; + +def : Pat<(int_x86_xop_vpcomled VR128:$src1, VR128:$src2), + (VPCOMDri VR128:$src1, VR128:$src2, (i8 1))>; +def : Pat<(int_x86_xop_vpcomled VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMDmi VR128:$src1, addr:$src2, (i8 1))>; + +def : Pat<(int_x86_xop_vpcomleb VR128:$src1, VR128:$src2), + (VPCOMBri VR128:$src1, VR128:$src2, (i8 1))>; +def : Pat<(int_x86_xop_vpcomleb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMBmi VR128:$src1, addr:$src2, (i8 1))>; + +// VPCOM LT +def : Pat<(int_x86_xop_vpcomltw VR128:$src1, VR128:$src2), + (VPCOMWri VR128:$src1, VR128:$src2, (i8 0))>; +def : Pat<(int_x86_xop_vpcomltw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMWmi VR128:$src1, addr:$src2, (i8 0))>; + +def : Pat<(int_x86_xop_vpcomltuw VR128:$src1, VR128:$src2), + (VPCOMUWri VR128:$src1, VR128:$src2, (i8 0))>; +def : Pat<(int_x86_xop_vpcomltuw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUWmi VR128:$src1, addr:$src2, (i8 0))>; + +def : Pat<(int_x86_xop_vpcomltuq VR128:$src1, VR128:$src2), + (VPCOMUQri VR128:$src1, VR128:$src2, (i8 0))>; +def : Pat<(int_x86_xop_vpcomltuq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUQmi VR128:$src1, addr:$src2, (i8 0))>; + +def : Pat<(int_x86_xop_vpcomltud VR128:$src1, VR128:$src2), + (VPCOMUDri VR128:$src1, VR128:$src2, (i8 0))>; +def : Pat<(int_x86_xop_vpcomltud VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUDmi VR128:$src1, addr:$src2, (i8 0))>; + +def : Pat<(int_x86_xop_vpcomltub VR128:$src1, VR128:$src2), + (VPCOMUBri VR128:$src1, VR128:$src2, (i8 0))>; +def : Pat<(int_x86_xop_vpcomltub VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUBmi VR128:$src1, addr:$src2, (i8 0))>; + +def : Pat<(int_x86_xop_vpcomltq VR128:$src1, VR128:$src2), + (VPCOMQri VR128:$src1, VR128:$src2, (i8 0))>; +def : Pat<(int_x86_xop_vpcomltq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMQmi VR128:$src1, addr:$src2, (i8 0))>; + +def : Pat<(int_x86_xop_vpcomltd VR128:$src1, VR128:$src2), + (VPCOMDri VR128:$src1, VR128:$src2, (i8 0))>; +def : Pat<(int_x86_xop_vpcomltd VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMDmi VR128:$src1, addr:$src2, (i8 0))>; + +def : Pat<(int_x86_xop_vpcomltb VR128:$src1, VR128:$src2), + (VPCOMBri VR128:$src1, VR128:$src2, (i8 0))>; +def : Pat<(int_x86_xop_vpcomltb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMBmi VR128:$src1, addr:$src2, (i8 0))>; + +// VPCOM NE +def : Pat<(int_x86_xop_vpcomnew VR128:$src1, VR128:$src2), + (VPCOMWri VR128:$src1, VR128:$src2, (i8 5))>; +def : Pat<(int_x86_xop_vpcomnew VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMWmi VR128:$src1, addr:$src2, (i8 5))>; + +def : Pat<(int_x86_xop_vpcomneuw VR128:$src1, VR128:$src2), + (VPCOMUWri VR128:$src1, VR128:$src2, (i8 5))>; +def : Pat<(int_x86_xop_vpcomneuw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUWmi VR128:$src1, addr:$src2, (i8 5))>; + +def : Pat<(int_x86_xop_vpcomneuq VR128:$src1, VR128:$src2), + (VPCOMUQri VR128:$src1, VR128:$src2, (i8 5))>; +def : Pat<(int_x86_xop_vpcomneuq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUQmi VR128:$src1, addr:$src2, (i8 5))>; + +def : Pat<(int_x86_xop_vpcomneud VR128:$src1, VR128:$src2), + (VPCOMUDri VR128:$src1, VR128:$src2, (i8 5))>; +def : Pat<(int_x86_xop_vpcomneud VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUDmi VR128:$src1, addr:$src2, (i8 5))>; + +def : Pat<(int_x86_xop_vpcomneub VR128:$src1, VR128:$src2), + (VPCOMUBri VR128:$src1, VR128:$src2, (i8 5))>; +def : Pat<(int_x86_xop_vpcomneub VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUBmi VR128:$src1, addr:$src2, (i8 5))>; + +def : Pat<(int_x86_xop_vpcomneq VR128:$src1, VR128:$src2), + (VPCOMQri VR128:$src1, VR128:$src2, (i8 5))>; +def : Pat<(int_x86_xop_vpcomneq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMQmi VR128:$src1, addr:$src2, (i8 5))>; + +def : Pat<(int_x86_xop_vpcomned VR128:$src1, VR128:$src2), + (VPCOMDri VR128:$src1, VR128:$src2, (i8 5))>; +def : Pat<(int_x86_xop_vpcomned VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMDmi VR128:$src1, addr:$src2, (i8 5))>; + +def : Pat<(int_x86_xop_vpcomneb VR128:$src1, VR128:$src2), + (VPCOMBri VR128:$src1, VR128:$src2, (i8 5))>; +def : Pat<(int_x86_xop_vpcomneb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMBmi VR128:$src1, addr:$src2, (i8 5))>; + +// VPCOM TRUE +def : Pat<(int_x86_xop_vpcomtruew VR128:$src1, VR128:$src2), + (VPCOMWri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomtruew VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMWmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomtrueuw VR128:$src1, VR128:$src2), + (VPCOMUWri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomtrueuw VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUWmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomtrueuq VR128:$src1, VR128:$src2), + (VPCOMUQri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomtrueuq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUQmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomtrueud VR128:$src1, VR128:$src2), + (VPCOMUDri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomtrueud VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUDmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomtrueub VR128:$src1, VR128:$src2), + (VPCOMUBri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomtrueub VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMUBmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomtrueq VR128:$src1, VR128:$src2), + (VPCOMQri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomtrueq VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMQmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomtrued VR128:$src1, VR128:$src2), + (VPCOMDri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomtrued VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMDmi VR128:$src1, addr:$src2, (i8 6))>; + +def : Pat<(int_x86_xop_vpcomtrueb VR128:$src1, VR128:$src2), + (VPCOMBri VR128:$src1, VR128:$src2, (i8 6))>; +def : Pat<(int_x86_xop_vpcomtrueb VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))), + (VPCOMBmi VR128:$src1, addr:$src2, (i8 6))>; + +// VPPERM +def : Pat<(int_x86_xop_vpperm VR128:$src1, VR128:$src2, VR128:$src3), + (VPPERMrr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpperm VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPPERMrm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpperm VR128:$src1, (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3), + (VPPERMmr VR128:$src1, addr:$src2, VR128:$src3)>; + +// VPCMOV +def : Pat<(int_x86_xop_vpcmov VR128:$src1, VR128:$src2, VR128:$src3), + (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov VR128:$src1, (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3), + (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_xop_vpcmov_256 VR256:$src1, VR256:$src2, + (bitconvert (memopv4i64 addr:$src3))), + (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_256 VR256:$src1, + (bitconvert (memopv4i64 addr:$src2)), + VR256:$src3), + (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VPCMOV di +def : Pat<(int_x86_xop_vpcmov_v2di VR128:$src1, VR128:$src2, VR128:$src3), + (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v2di VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v2di VR128:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3), + (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4di_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4di_256 VR256:$src1, VR256:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4di_256 VR256:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR256:$src3), + (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VPCMOV si +def : Pat<(int_x86_xop_vpcmov_v4si VR128:$src1, VR128:$src2, VR128:$src3), + (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4si VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4si VR128:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3), + (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v8si_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v8si_256 VR256:$src1, VR256:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v8si_256 VR256:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR256:$src3), + (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>; + + +// VPCMOV hi +def : Pat<(int_x86_xop_vpcmov_v8hi VR128:$src1, VR128:$src2, VR128:$src3), + (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v8hi VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v8hi VR128:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3), + (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v16hi_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v16hi_256 VR256:$src1, VR256:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v16hi_256 VR256:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR256:$src3), + (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VPCMOV qi +def : Pat<(int_x86_xop_vpcmov_v16qi VR128:$src1, VR128:$src2, VR128:$src3), + (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v16qi VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v16qi VR128:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3), + (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v32qi_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v32qi_256 VR256:$src1, VR256:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v32qi_256 VR256:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR256:$src3), + (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VPCMOV df +def : Pat<(int_x86_xop_vpcmov_v2df VR128:$src1, VR128:$src2, VR128:$src3), + (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v2df VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v2df VR128:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3), + (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4df_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4df_256 VR256:$src1, VR256:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4df_256 VR256:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR256:$src3), + (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VPCMOV sf +def : Pat<(int_x86_xop_vpcmov_v4sf VR128:$src1, VR128:$src2, VR128:$src3), + (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4sf VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v4sf VR128:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3), + (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v8sf_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v8sf_256 VR256:$src1, VR256:$src2, + (bitconvert (memopv2i64 addr:$src3))), + (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_xop_vpcmov_v8sf_256 VR256:$src1, + (bitconvert (memopv2i64 addr:$src2)), + VR256:$src3), + (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>; + diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll new file mode 100644 index 0000000..ca1651e --- /dev/null +++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll @@ -0,0 +1,1059 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s + +define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: vpermil2pd + %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ; [#uses=1] + ret <2 x double> %res +} +define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpermil2pd + %vec = load <2 x double>* %a1 + %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1] + ret <2 x double> %res +} +define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpermil2pd + %vec = load <2 x double>* %a2 + %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone + +define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { + ; CHECK: vpermil2pd + ; CHECK: ymm + %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ; + ret <4 x double> %res +} +define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpermil2pd + ; CHECK: ymm + %vec = load <4 x double>* %a1 + %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ; + ret <4 x double> %res +} +define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpermil2pd + ; CHECK: ymm + %vec = load <4 x double>* %a2 + %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone + +define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: vpermil2ps + %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone + +define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: vpermil2ps + ; CHECK: ymm + %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ; + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { + ; CHECK: vpcmov + %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcmov_v2di(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { + ; CHECK: vpcmov + %res = call <2 x i64> @llvm.x86.xop.vpcmov.v2di(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcmov.v2di(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcmov_v4si(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { + ; CHECK: vpcmov + %res = call <4 x i32> @llvm.x86.xop.vpcmov.v4si(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcmov.v4si(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcmov_v8hi(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { + ; CHECK: vpcmov + %res = call <8 x i16> @llvm.x86.xop.vpcmov.v8hi(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcmov.v8hi(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcmov_v16qi(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { + ; CHECK: vpcmov + %res = call <16 x i8> @llvm.x86.xop.vpcmov.v16qi(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcmov.v16qi(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + +define <2 x double> @test_int_x86_xop_vpcmov_v2df(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: vpcmov + %res = call <2 x double> @llvm.x86.xop.vpcmov.v2df(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.xop.vpcmov.v2df(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + +define <4 x float> @test_int_x86_xop_vpcmov_v4sf(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: vpcmov + %res = call <4 x float> @llvm.x86.xop.vpcmov.v4sf(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.xop.vpcmov.v4sf(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { + ; CHECK: vpcmov + ; CHECK: ymm + %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ; + ret <4 x i64> %res +} +define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpcmov + ; CHECK: ymm + %vec = load <4 x i64>* %a1 + %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ; + ret <4 x i64> %res +} +define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpcmov + ; CHECK: ymm + %vec = load <4 x i64>* %a2 + %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone + +define <4 x i64> @test_int_x86_xop_vpcmov_v4di_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { + ; CHECK: vpcmov + ; CHECK: ymm + %res = call <4 x i64> @llvm.x86.xop.vpcmov.v4di.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.xop.vpcmov.v4di.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone + +define <8 x i32> @test_int_x86_xop_vpcmov_v8si_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { + ; CHECK: vpcmov + ; CHECK: ymm + %res = call <8 x i32> @llvm.x86.xop.vpcmov.v8si.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) ; + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.xop.vpcmov.v8si.256(<8 x i32>, <8 x i32>, <8 x i32>) nounwind readnone + +define <16 x i16> @test_int_x86_xop_vpcmov_v16hi_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2) { + ; CHECK: vpcmov + ; CHECK: ymm + %res = call <16 x i16> @llvm.x86.xop.vpcmov.v16hi.256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2) ; + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.xop.vpcmov.v16hi.256(<16 x i16>, <16 x i16>, <16 x i16>) nounwind readnone + +define <32 x i8> @test_int_x86_xop_vpcmov_v32qi_256(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) { + ; CHECK: vpcmov + ; CHECK: ymm + %res = call <32 x i8> @llvm.x86.xop.vpcmov.v32qi.256(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.xop.vpcmov.v32qi.256(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone + +define <4 x double> @test_int_x86_xop_vpcmov_v4df_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { + ; CHECK: vpcmov + ; CHECK: ymm + %res = call <4 x double> @llvm.x86.xop.vpcmov.v4df.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.xop.vpcmov.v4df.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + +define <8 x float> @test_int_x86_xop_vpcmov_v8sf_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: vpcmov + ; CHECK: ymm + %res = call <8 x float> @llvm.x86.xop.vpcmov.v8sf.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.xop.vpcmov.v8sf.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK:vpcomb + %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) { + ; CHECK-NOT: vmovaps + ; CHECK:vpcomb + %vec = load <16 x i8>* %a1 + %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomw + %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomd + %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomq + %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomub + %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomud + %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomuq + %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomuw + %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomb + %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomd + %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomq + %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomub + %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomud + %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomuq + %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomuw + %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomw + %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomb + %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomd + %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomq + %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomub + %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomud + %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomuq + %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomuw + %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomw + %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomb + %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomd + %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomq + %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomub + %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomud + %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomuq + %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomuw + %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomw + %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomb + %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomd + %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomq + %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomub + %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomud + %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomuq + %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomuw + %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomw + %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomb + %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomd + %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomq + %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomub + %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomud + %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomuq + %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomuw + %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomw + %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomb + %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomd + %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomq + %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomub + %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomud + %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomuq + %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomuw + %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomw + %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomb + %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomd + %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomq + %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcomub + %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcomud + %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcomuq + %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomuw + %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcomw + %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) { + ; CHECK: vphaddbd + %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) { + ; CHECK: vphaddbq + %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) { + ; CHECK: vphaddbw + %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) { + ; CHECK: vphadddq + %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) { + ; CHECK: vphaddubd + %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) { + ; CHECK: vphaddubq + %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) { + ; CHECK: vphaddubw + %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) { + ; CHECK: vphaddudq + %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) { + ; CHECK: vphadduwd + %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) { + ; CHECK: vphadduwq + %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) { + ; CHECK: vphaddwd + %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) { + ; CHECK: vphaddwq + %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) { + ; CHECK: vphsubbw + %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) { + ; CHECK: vphsubdq + %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ; + ret <2 x i64> %res +} +define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) { + ; CHECK-NOT: vmovaps + ; CHECK: vphsubdq + %vec = load <4 x i32>* %a0 + %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) { + ; CHECK: vphsubwd + %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ; + ret <4 x i32> %res +} +define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) { + ; CHECK-NOT: vmovaps + ; CHECK: vphsubwd + %vec = load <8 x i16>* %a0 + %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { + ; CHECK: vpmacsdd + %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { + ; CHECK: vpmacsdqh + %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { + ; CHECK: vpmacsdql + %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { + ; CHECK: vpmacssdd + %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { + ; CHECK: vpmacssdqh + %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { + ; CHECK: vpmacssdql + %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { + ; CHECK: vpmacsswd + %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { + ; CHECK: vpmacssww + %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { + ; CHECK: vpmacswd + %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { + ; CHECK: vpmacsww + %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { + ; CHECK: vpmadcsswd + %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { + ; CHECK: vpmadcswd + %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ; + ret <4 x i32> %res +} +define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpmadcswd + %vec = load <8 x i16>* %a1 + %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { + ; CHECK: vpperm + %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; + ret <16 x i8> %res +} +define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpperm + %vec = load <16 x i8>* %a2 + %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ; + ret <16 x i8> %res +} +define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) { + ; CHECK-NOT: vmovaps + ; CHECK: vpperm + %vec = load <16 x i8>* %a1 + %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vprotb + %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vprotd + %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vprotq + %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vprotw + %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpshab + %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpshad + %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpshaq + %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpshaw + %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpshlb + %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpshld + %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpshlq + %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpshlw + %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) { + ; CHECK-NOT: vmovaps + ; CHECK: vpshlw + %vec = load <8 x i16>* %a1 + %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ; + ret <8 x i16> %res +} +define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) { + ; CHECK-NOT: vmovaps + ; CHECK: vpshlw + %vec = load <8 x i16>* %a0 + %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone + +define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK-NOT: mov + ; CHECK: vfrczss + %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ; + ret <4 x float> %res +} +define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) { + ; CHECK-NOT: mov + ; CHECK: vfrczss + %elem = load float* %a1 + %vec = insertelement <4 x float> undef, float %elem, i32 0 + %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone + +define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK-NOT: mov + ; CHECK: vfrczsd + %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ; + ret <2 x double> %res +} +define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) { + ; CHECK-NOT: mov + ; CHECK: vfrczsd + %elem = load double* %a1 + %vec = insertelement <2 x double> undef, double %elem, i32 0 + %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone + +define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) { + ; CHECK: vfrczpd + %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ; + ret <2 x double> %res +} +define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) { + ; CHECK-NOT: vmovaps + ; CHECK: vfrczpd + %vec = load <2 x double>* %a0 + %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone + +define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) { + ; CHECK: vfrczpd + ; CHECK: ymm + %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ; + ret <4 x double> %res +} +define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) { + ; CHECK-NOT: vmovaps + ; CHECK: vfrczpd + ; CHECK: ymm + %vec = load <4 x double>* %a0 + %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone + +define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) { + ; CHECK: vfrczps + %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ; + ret <4 x float> %res +} +define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) { + ; CHECK-NOT: vmovaps + ; CHECK: vfrczps + %vec = load <4 x float>* %a0 + %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone + +define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) { + ; CHECK: vfrczps + ; CHECK: ymm + %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ; + ret <8 x float> %res +} +define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) { + ; CHECK-NOT: vmovaps + ; CHECK: vfrczps + ; CHECK: ymm + %vec = load <8 x float>* %a0 + %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ; + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone + |