From 2fb982aa720ec1ef149b2d9add2673c313f08792 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 28 Aug 2013 11:21:58 +0000 Subject: AVX-512: added SQRT, VRSQRT14, VCOMISS, VUCOMISS, VRCP14, VPABS git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189472 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx512-arith.ll | 37 +++++++++++++++ test/CodeGen/X86/avx512-cmp.ll | 27 +++++++++++ test/CodeGen/X86/avx512-intrinsics.ll | 88 +++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 test/CodeGen/X86/avx512-cmp.ll create mode 100644 test/CodeGen/X86/avx512-intrinsics.ll (limited to 'test') diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index 55ce9f9..d5af76f 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -186,6 +186,43 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { ret <16 x i32> %x } +; CHECK-LABEL: sqrtA +; CHECK: vsqrtssz +; CHECK: ret +declare float @sqrtf(float) readnone +define float @sqrtA(float %a) nounwind uwtable readnone ssp { +entry: + %conv1 = tail call float @sqrtf(float %a) nounwind readnone + ret float %conv1 +} + +; CHECK-LABEL: sqrtB +; CHECK: vsqrtsdz +; CHECK: ret +declare double @sqrt(double) readnone +define double @sqrtB(double %a) nounwind uwtable readnone ssp { +entry: + %call = tail call double @sqrt(double %a) nounwind readnone + ret double %call +} + +; CHECK-LABEL: sqrtC +; CHECK: vsqrtssz +; CHECK: ret +declare float @llvm.sqrt.f32(float) +define float @sqrtC(float %a) nounwind { + %b = call float @llvm.sqrt.f32(float %a) + ret float %b +} + +; CHECK-LABEL: fadd_broadcast +; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK: ret +define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { + %b = fadd <16 x float> %a, + ret <16 x float> %b +} + ; CHECK-LABEL: addq_broadcast ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 ; CHECK: ret diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll new file mode 100644 index 0000000..ba52745 --- /dev/null +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +; CHECK: vucomisdz +define double @test1(double %a, double %b) nounwind { + %tobool = fcmp une double %a, %b + br i1 %tobool, label %l1, label %l2 + +l1: + %c = fsub double %a, %b + ret double %c +l2: + %c1 = fadd double %a, %b + ret double %c1 +} + +; CHECK: vucomissz +define float @test2(float %a, float %b) nounwind { + %tobool = fcmp olt float %a, %b + br i1 %tobool, label %l1, label %l2 + +l1: + %c = fsub float %a, %b + ret float %c +l2: + %c1 = fadd float %a, %b + ret float %c1 +} diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll new file mode 100644 index 0000000..c0ac719 --- /dev/null +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -0,0 +1,88 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +declare i32 @llvm.x86.avx512.kortestz(i16, i16) nounwind readnone +; CHECK: test_x86_avx3_kortestz +; CHECK: kortestw +; CHECK: sete +define i32 @test_x86_avx3_kortestz(i16 %a0, i16 %a1) { + %res = call i32 @llvm.x86.avx512.kortestz(i16 %a0, i16 %a1) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.kortestc(i16, i16) nounwind readnone +; CHECK: test_x86_avx3_kortestc +; CHECK: kortestw +; CHECK: sbbl +define i32 @test_x86_avx3_kortestc(i16 %a0, i16 %a1) { + %res = call i32 @llvm.x86.avx512.kortestc(i16 %a0, i16 %a1) + ret i32 %res +} + +define <16 x float> @test_x86_avx3_rcp_ps_512(<16 x float> %a0) { + ; CHECK: vrcp14ps + %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>) nounwind readnone + +define <8 x double> @test_x86_avx3_rcp_pd_512(<8 x double> %a0) { + ; CHECK: vrcp14pd + %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>) nounwind readnone + + +define <8 x double> @test_x86_avx3_rndscale_pd_512(<8 x double> %a0) { + ; CHECK: vrndscale + %res = call <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double> %a0, i32 7) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double>, i32) nounwind readnone + + +define <16 x float> @test_x86_avx3_rndscale_ps_512(<16 x float> %a0) { + ; CHECK: vrndscale + %res = call <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float> %a0, i32 7) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float>, i32) nounwind readnone + + +define <16 x float> @test_x86_avx3_rsqrt_ps_512(<16 x float> %a0) { + ; CHECK: vrsqrt14ps + %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>) nounwind readnone + + +define <8 x double> @test_x86_avx3_sqrt_pd_512(<8 x double> %a0) { + ; CHECK: vsqrtpd + %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone + + +define <16 x float> @test_x86_avx3_sqrt_ps_512(<16 x float> %a0) { + ; CHECK: vsqrtps + %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone + +define <4 x float> @test_x86_avx3_sqrt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vsqrtssz + %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone + +define <2 x double> @test_x86_avx3_sqrt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vsqrtsdz + %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone + -- cgit v1.1