summaryrefslogtreecommitdiffstats
path: root/src/util
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2015-06-29 09:38:34 -0700
committerMatt Turner <mattst88@gmail.com>2015-08-04 10:33:13 -0700
commit680de24545d23d0c2b699020267ca484f81a04a9 (patch)
treefc35bb29a5935960bff0ec0ffc5e676c69b378ca /src/util
parent3c050222b0d5b47c885ca72f3c7af22c0d28b5ad (diff)
downloadexternal_mesa3d-680de24545d23d0c2b699020267ca484f81a04a9.zip
external_mesa3d-680de24545d23d0c2b699020267ca484f81a04a9.tar.gz
external_mesa3d-680de24545d23d0c2b699020267ca484f81a04a9.tar.bz2
util: Use SSE intrinsics in _mesa_lroundeven{f,}.
gcc actually generates this for us now that we use -fno-math-errno (which is weird, since lrintf()/lrint() don't set errno) but clang still does not. Presumably helps MSVC as well. Reduced .text size by 8.5k with gcc before -fno-math-errno. text data bss dec hex filename 4935850 195136 26192 5157178 4eb13a i965_dri.so before 4927225 195128 26192 5148545 4e8f81 i965_dri.so after Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Diffstat (limited to 'src/util')
-rw-r--r--src/util/rounding.h22
1 files changed, 22 insertions, 0 deletions
diff --git a/src/util/rounding.h b/src/util/rounding.h
index 088cf86..b0c9918 100644
--- a/src/util/rounding.h
+++ b/src/util/rounding.h
@@ -25,6 +25,12 @@
#define _ROUNDING_H
#include <math.h>
+#include <limits.h>
+
+#ifdef __x86_64__
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
@@ -87,7 +93,15 @@ _mesa_roundeven(double x)
static inline long
_mesa_lroundevenf(float x)
{
+#ifdef __x86_64__
+#if LONG_BIT == 64
+ return _mm_cvtss_si64(_mm_load_ss(&x));
+#elif LONG_BIT == 32
+ return _mm_cvtss_si32(_mm_load_ss(&x));
+#endif
+#else
return lrintf(x);
+#endif
}
/**
@@ -97,7 +111,15 @@ _mesa_lroundevenf(float x)
static inline long
_mesa_lroundeven(double x)
{
+#ifdef __x86_64__
+#if LONG_BIT == 64
+ return _mm_cvtsd_si64(_mm_load_sd(&x));
+#elif LONG_BIT == 32
+ return _mm_cvtsd_si32(_mm_load_sd(&x));
+#endif
+#else
return lrint(x);
+#endif
}
#endif