From 9e8edfa19034ae69139ef10b88f958b4f58d57ea Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Tue, 24 May 2016 22:36:28 +0900 Subject: util,gallivm: Explicitly enable/disable fma attribute. As suggested by Roland Scheidegger. Use the same logic as f16c, since fma requires VEX encoding. But disable FMA on LLVM 3.3 without MCJIT. Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 ++ src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 9 +++++++++ src/gallium/auxiliary/util/u_cpu_detect.c | 1 + src/gallium/auxiliary/util/u_cpu_detect.h | 1 + 4 files changed, 13 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index cf21ab0..22340c0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -420,6 +420,7 @@ lp_build_init(void) util_cpu_caps.has_avx = 0; util_cpu_caps.has_avx2 = 0; util_cpu_caps.has_f16c = 0; + util_cpu_caps.has_fma = 0; } #endif @@ -454,6 +455,7 @@ lp_build_init(void) util_cpu_caps.has_avx = 0; util_cpu_caps.has_avx2 = 0; util_cpu_caps.has_f16c = 0; + util_cpu_caps.has_fma = 0; } #ifdef PIPE_ARCH_PPC_64 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 805a50b..5ffe09c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -570,6 +570,15 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, */ MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx"); MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c"); + if (HAVE_LLVM >= 0x0304 || useMCJIT) { + MAttrs.push_back(util_cpu_caps.has_fma ? "+fma" : "-fma"); + } else { + /* + * The old JIT in LLVM 3.3 has a bug encoding llvm.fmuladd.f32 and + * llvm.fmuladd.v2f32 intrinsics when FMA is available. + */ + MAttrs.push_back("-fma"); + } MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2"); /* disable avx512 and all subvariants */ #if HAVE_LLVM >= 0x0304 diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index aa3c30a..3a51ab9 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -369,6 +369,7 @@ util_cpu_detect(void) ((regs2[2] >> 27) & 1) && // OSXSAVE ((xgetbv() & 6) == 6); // XMM & YMM util_cpu_caps.has_f16c = ((regs2[2] >> 29) & 1) && util_cpu_caps.has_avx; + util_cpu_caps.has_fma = ((regs2[2] >> 12) & 1) && util_cpu_caps.has_avx; util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */ #if defined(PIPE_ARCH_X86_64) util_cpu_caps.has_daz = 1; diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h index 5ccfc93..c873232 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.h +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -66,6 +66,7 @@ struct util_cpu_caps { unsigned has_avx:1; unsigned has_avx2:1; unsigned has_f16c:1; + unsigned has_fma:1; unsigned has_3dnow:1; unsigned has_3dnow_ext:1; unsigned has_xop:1; -- cgit v1.1