summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/util
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@vmware.com>2013-04-03 01:06:52 +0200
committerRoland Scheidegger <sroland@vmware.com>2013-04-04 01:03:42 +0200
commit067a0ae42017f327edce7634890e699b4ec8492c (patch)
tree0b5306641ea0aec8efe71c58f87a456de295d756 /src/gallium/auxiliary/util
parent302df7cc85b0e2ce47c40048f30bd116b0d692fc (diff)
downloadexternal_mesa3d-067a0ae42017f327edce7634890e699b4ec8492c.zip
external_mesa3d-067a0ae42017f327edce7634890e699b4ec8492c.tar.gz
external_mesa3d-067a0ae42017f327edce7634890e699b4ec8492c.tar.bz2
gallivm: use f16c hw support for float->half and half->float conversion
Should be way faster of course on cpus supporting this (includes AMD Bulldozer and Jaguar cores, Intel Ivy Bridge and up (except budget models)). Passes piglit fbo-blending-formats GL_ARB_texture_float -auto on Ivy Bridge. Reviewed-by: Brian Paul <brianp@vmware.com>
Diffstat (limited to 'src/gallium/auxiliary/util')
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c1
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.h1
2 files changed, 2 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 0328051..7e6df9d 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -279,6 +279,7 @@ util_cpu_detect(void)
util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1;
util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1;
util_cpu_caps.has_avx = (regs2[2] >> 28) & 1;
+ util_cpu_caps.has_f16c = (regs2[2] >> 29) & 1;
util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index acac686..21c2f04 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -63,6 +63,7 @@ struct util_cpu_caps {
unsigned has_sse4_1:1;
unsigned has_sse4_2:1;
unsigned has_avx:1;
+ unsigned has_f16c:1;
unsigned has_3dnow:1;
unsigned has_3dnow_ext:1;
unsigned has_altivec:1;