From 1503aba4a036f5394c7983417bc1e64613b2fc77 Mon Sep 17 00:00:00 2001
From: Elena Demikhovsky <elena.demikhovsky@intel.com>
Date: Wed, 1 Aug 2012 12:06:00 +0000
Subject: Added FMA functionality to X86 target.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161110 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/X86/fma.ll             |  12 ++--
 test/CodeGen/X86/fma3-intrinsics.ll |  36 +++++-----
 test/CodeGen/X86/fma_patterns.ll    | 139 ++++++++++++++++++++++++++++++++++++
 3 files changed, 165 insertions(+), 22 deletions(-)
 create mode 100644 test/CodeGen/X86/fma_patterns.ll

(limited to 'test/CodeGen')

diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
index 5deedb9..b0c1d0a 100644
--- a/test/CodeGen/X86/fma.ll
+++ b/test/CodeGen/X86/fma.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma  | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=i386-apple-darwin10               | FileCheck %s --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10             | FileCheck %s --check-prefix=CHECK-FMA-CALL
 
 ; CHECK: test_f32
-; CHECK: _fmaf
+; CHECK-FMA-INST: vfmadd213ss
+; CHECK-FMA-CALL: _fmaf
 
 define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
 entry:
@@ -11,7 +14,8 @@ entry:
 }
 
 ; CHECK: test_f64
-; CHECK: _fma
+; CHECK-FMA-INST: vfmadd213sd
+; CHECK-FMA-CALL: _fma
 
 define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll
index 8659dfe..90529e0 100755
--- a/test/CodeGen/X86/fma3-intrinsics.ll
+++ b/test/CodeGen/X86/fma3-intrinsics.ll
@@ -1,42 +1,42 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma | FileCheck %s
 
 define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmadd132ss %xmm
+  ; CHECK: fmadd213ss %xmm
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmadd132ps
+  ; CHECK: fmadd213ps
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  ; CHECK: fmadd132ps {{.*\(%r.*}}, %ymm
+  ; CHECK: fmadd213ps {{.*\(%r.*}}, %ymm
   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmadd132ss %xmm
+  ; CHECK: fnmadd213ss %xmm
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmadd132ps
+  ; CHECK: fnmadd213ps
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  ; CHECK: fnmadd132ps {{.*\(%r.*}}, %ymm
+  ; CHECK: fnmadd213ps {{.*\(%r.*}}, %ymm
   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
   ret <8 x float> %res
 }
@@ -44,28 +44,28 @@ declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x
 
 
 define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmsub132ss
+  ; CHECK: fmsub213ss
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmsub132ps
+  ; CHECK: fmsub213ps
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmsub132ss
+  ; CHECK: fnmsub213ss
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmsub132ps
+  ; CHECK: fnmsub213ps
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
@@ -74,28 +74,28 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x floa
 ;;;;
 
 define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmadd132sd
+  ; CHECK: fmadd213sd
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmadd132pd
+  ; CHECK: fmadd213pd
   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmadd132sd
+  ; CHECK: fnmadd213sd
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmadd132pd
+  ; CHECK: fnmadd213pd
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
@@ -104,28 +104,28 @@ declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x d
 
 
 define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmsub132sd
+  ; CHECK: fmsub213sd
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmsub132pd
+  ; CHECK: fmsub213pd
   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmsub132sd
+  ; CHECK: fnmsub213sd
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmsub132pd
+  ; CHECK: fnmsub213pd
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
new file mode 100644
index 0000000..b04663a
--- /dev/null
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+
+; CHECK: test_x86_fmadd_ps
+; CHECK: vfmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fadd <4 x float> %x, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps
+; CHECK: fmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fsub <4 x float> %x, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps
+; CHECK: fnmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fsub <4 x float> %a2, %x
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps
+; CHECK: fnmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <4 x float> %y, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmadd_ps_y
+; CHECK: vfmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fadd <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps_y
+; CHECK: vfmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps_y
+; CHECK: vfnmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %a2, %x
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps_y
+; CHECK: vfnmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <8 x float> %y, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmadd_pd_y
+; CHECK: vfmadd213pd     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  %x = fmul <4 x double> %a0, %a1
+  %res = fadd <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd_y
+; CHECK: vfmsub213pd     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  %x = fmul <4 x double> %a0, %a1
+  %res = fsub <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd
+; CHECK: vfmsub213pd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  %x = fmul <2 x double> %a0, %a1
+  %res = fsub <2 x double> %x, %a2
+  ret <2 x double> %res
+}
+
+; CHECK: test_x86_fnmadd_ss
+; CHECK: vfnmadd213ss    %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
+  %x = fmul float %a0, %a1
+  %res = fsub float %a2, %x
+  ret float %res
+}
+
+; CHECK: test_x86_fnmadd_sd
+; CHECK: vfnmadd213sd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %a2, %x
+  ret double %res
+}
+
+; CHECK: test_x86_fmsub_sd
+; CHECK: vfmsub213sd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %x, %a2
+  ret double %res
+}
+
+; CHECK: test_x86_fnmsub_ss
+; CHECK: vfnmsub213ss     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
+  %x = fsub float -0.000000e+00, %a0
+  %y = fmul float %x, %a1
+  %res = fsub float %y, %a2
+  ret float %res
+}
-- 
cgit v1.1