summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJose Fonseca <jfonseca@vmware.com>2016-05-24 22:36:28 +0900
committerJose Fonseca <jfonseca@vmware.com>2016-06-10 13:47:35 +0100
commit9e8edfa19034ae69139ef10b88f958b4f58d57ea (patch)
tree03fa0133cad7b88ba0eeb364b1b969576d71935a
parent54f755fa0fda14c578022767bcef2f27b2e89707 (diff)
downloadexternal_mesa3d-9e8edfa19034ae69139ef10b88f958b4f58d57ea.zip
external_mesa3d-9e8edfa19034ae69139ef10b88f958b4f58d57ea.tar.gz
external_mesa3d-9e8edfa19034ae69139ef10b88f958b4f58d57ea.tar.bz2
util,gallivm: Explicitly enable/disable fma attribute.
As suggested by Roland Scheidegger. Use the same logic as f16c, since fma requires VEX encoding. But disable FMA on LLVM 3.3 without MCJIT. Reviewed-by: Roland Scheidegger <sroland@vmware.com>
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp9
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c1
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.h1
4 files changed, 13 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index cf21ab0..22340c0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -420,6 +420,7 @@ lp_build_init(void)
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
util_cpu_caps.has_f16c = 0;
+ util_cpu_caps.has_fma = 0;
}
#endif
@@ -454,6 +455,7 @@ lp_build_init(void)
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
util_cpu_caps.has_f16c = 0;
+ util_cpu_caps.has_fma = 0;
}
#ifdef PIPE_ARCH_PPC_64
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 805a50b..5ffe09c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -570,6 +570,15 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
*/
MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx");
MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
+ if (HAVE_LLVM >= 0x0304 || useMCJIT) {
+ MAttrs.push_back(util_cpu_caps.has_fma ? "+fma" : "-fma");
+ } else {
+ /*
+ * The old JIT in LLVM 3.3 has a bug encoding llvm.fmuladd.f32 and
+ * llvm.fmuladd.v2f32 intrinsics when FMA is available.
+ */
+ MAttrs.push_back("-fma");
+ }
MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
/* disable avx512 and all subvariants */
#if HAVE_LLVM >= 0x0304
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index aa3c30a..3a51ab9 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -369,6 +369,7 @@ util_cpu_detect(void)
((regs2[2] >> 27) & 1) && // OSXSAVE
((xgetbv() & 6) == 6); // XMM & YMM
util_cpu_caps.has_f16c = ((regs2[2] >> 29) & 1) && util_cpu_caps.has_avx;
+ util_cpu_caps.has_fma = ((regs2[2] >> 12) & 1) && util_cpu_caps.has_avx;
util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
#if defined(PIPE_ARCH_X86_64)
util_cpu_caps.has_daz = 1;
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index 5ccfc93..c873232 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -66,6 +66,7 @@ struct util_cpu_caps {
unsigned has_avx:1;
unsigned has_avx2:1;
unsigned has_f16c:1;
+ unsigned has_fma:1;
unsigned has_3dnow:1;
unsigned has_3dnow_ext:1;
unsigned has_xop:1;