diff options
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r-- | test/CodeGen/X86/extended-fma-contraction.ll | 22 | ||||
-rw-r--r-- | test/CodeGen/X86/fma_patterns_wide.ll | 84 | ||||
-rw-r--r-- | test/CodeGen/X86/wide-fma-contraction.ll | 14 |
3 files changed, 117 insertions, 3 deletions
diff --git a/test/CodeGen/X86/extended-fma-contraction.ll b/test/CodeGen/X86/extended-fma-contraction.ll new file mode 100644 index 0000000..ef2c22b --- /dev/null +++ b/test/CodeGen/X86/extended-fma-contraction.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s +; RUN: llc -march=x86 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA + +; CHECK: fmafunc +define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) { + +; CHECK-NOT: vmulps +; CHECK-NOT: vaddps +; CHECK: vfmaddps +; CHECK-NOT: vmulps +; CHECK-NOT: vaddps + +; CHECK-NOFMA-NOT: calll +; CHECK-NOFMA: vmulps +; CHECK-NOFMA: vaddps +; CHECK-NOFMA-NOT: calll + + %ret = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) + ret <3 x float> %ret +} + +declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) nounwind readnone diff --git a/test/CodeGen/X86/fma_patterns_wide.ll b/test/CodeGen/X86/fma_patterns_wide.ll new file mode 100644 index 0000000..d84e5a0 --- /dev/null +++ b/test/CodeGen/X86/fma_patterns_wide.ll @@ -0,0 +1,84 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4 + +; CHECK: test_x86_fmadd_ps_y_wide +; CHECK: vfmadd213ps +; CHECK: vfmadd213ps +; CHECK: ret +; CHECK_FMA4: test_x86_fmadd_ps_y_wide +; CHECK_FMA4: vfmaddps +; CHECK_FMA4: vfmaddps +; CHECK_FMA4: ret +define <16 x float> @test_x86_fmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { + %x = fmul <16 x float> %a0, %a1 + %res = fadd <16 x float> %x, %a2 + ret <16 x float> %res +} + +; CHECK: test_x86_fmsub_ps_y_wide +; CHECK: vfmsub213ps +; CHECK: vfmsub213ps +; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_ps_y_wide +; CHECK_FMA4: vfmsubps +; CHECK_FMA4: vfmsubps +; CHECK_FMA4: ret +define <16 x float> @test_x86_fmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { + %x = fmul <16 x float> %a0, %a1 + %res = fsub <16 x float> %x, %a2 + ret <16 x float> %res +} + +; CHECK: test_x86_fnmadd_ps_y_wide +; CHECK: vfnmadd213ps +; CHECK: vfnmadd213ps +; CHECK: ret +; CHECK_FMA4: test_x86_fnmadd_ps_y_wide +; CHECK_FMA4: vfnmaddps +; CHECK_FMA4: vfnmaddps +; CHECK_FMA4: ret +define <16 x float> @test_x86_fnmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { + %x = fmul <16 x float> %a0, %a1 + %res = fsub <16 x float> %a2, %x + ret <16 x float> %res +} + +; CHECK: test_x86_fnmsub_ps_y_wide +; CHECK: vfnmsub213ps +; CHECK: vfnmsub213ps +; CHECK: ret +define <16 x float> @test_x86_fnmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { + %x = fmul <16 x float> %a0, %a1 + %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x + %res = fsub <16 x float> %y, %a2 + ret <16 x float> %res +} + +; CHECK: test_x86_fmadd_pd_y_wide +; CHECK: vfmadd213pd +; CHECK: vfmadd213pd +; CHECK: ret +; CHECK_FMA4: test_x86_fmadd_pd_y_wide +; CHECK_FMA4: vfmaddpd +; CHECK_FMA4: vfmaddpd +; CHECK_FMA4: ret +define <8 x double> @test_x86_fmadd_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { + %x = fmul <8 x double> %a0, %a1 + %res = fadd <8 x double> %x, %a2 + ret <8 x double> %res +} + +; CHECK: test_x86_fmsub_pd_y_wide +; CHECK: vfmsub213pd +; CHECK: vfmsub213pd +; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_pd_y_wide +; CHECK_FMA4: vfmsubpd +; CHECK_FMA4: vfmsubpd +; CHECK_FMA4: ret +define <8 x double> @test_x86_fmsub_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { + %x = fmul <8 x double> %a0, %a1 + %res = fsub <8 x double> %x, %a2 + ret <8 x double> %res +} diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll index d93f33b..7ee0fba 100644 --- a/test/CodeGen/X86/wide-fma-contraction.ll +++ b/test/CodeGen/X86/wide-fma-contraction.ll @@ -1,7 +1,9 @@ ; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s +; RUN: llc -march=x86 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA ; CHECK: fmafunc define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) { + ; CHECK-NOT: vmulps ; CHECK-NOT: vaddps ; CHECK: vfmaddps @@ -10,11 +12,17 @@ define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) ; CHECK: vfmaddps ; CHECK-NOT: vmulps ; CHECK-NOT: vaddps + +; CHECK-NOFMA-NOT: calll +; CHECK-NOFMA: vmulps +; CHECK-NOFMA: vaddps +; CHECK-NOFMA-NOT: calll +; CHECK-NOFMA: vmulps +; CHECK-NOFMA: vaddps +; CHECK-NOFMA-NOT: calll + %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) ret <16 x float> %ret } declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone - - - |