diff options
author | Matt Turner <mattst88@gmail.com> | 2015-06-29 09:38:34 -0700 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2015-08-04 10:33:13 -0700 |
commit | 680de24545d23d0c2b699020267ca484f81a04a9 (patch) | |
tree | fc35bb29a5935960bff0ec0ffc5e676c69b378ca /src/util | |
parent | 3c050222b0d5b47c885ca72f3c7af22c0d28b5ad (diff) | |
download | external_mesa3d-680de24545d23d0c2b699020267ca484f81a04a9.zip external_mesa3d-680de24545d23d0c2b699020267ca484f81a04a9.tar.gz external_mesa3d-680de24545d23d0c2b699020267ca484f81a04a9.tar.bz2 |
util: Use SSE intrinsics in _mesa_lroundeven{f,}.
gcc actually generates this for us now that we use -fno-math-errno
(which is weird, since lrintf()/lrint() don't set errno) but clang still
does not. Presumably helps MSVC as well.
Reduced .text size by 8.5k with gcc before -fno-math-errno.
text data bss dec hex filename
4935850 195136 26192 5157178 4eb13a i965_dri.so before
4927225 195128 26192 5148545 4e8f81 i965_dri.so after
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Diffstat (limited to 'src/util')
-rw-r--r-- | src/util/rounding.h | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/src/util/rounding.h b/src/util/rounding.h index 088cf86..b0c9918 100644 --- a/src/util/rounding.h +++ b/src/util/rounding.h @@ -25,6 +25,12 @@ #define _ROUNDING_H #include <math.h> +#include <limits.h> + +#ifdef __x86_64__ +#include <xmmintrin.h> +#include <emmintrin.h> +#endif #ifdef __SSE4_1__ #include <smmintrin.h> @@ -87,7 +93,15 @@ _mesa_roundeven(double x) static inline long _mesa_lroundevenf(float x) { +#ifdef __x86_64__ +#if LONG_BIT == 64 + return _mm_cvtss_si64(_mm_load_ss(&x)); +#elif LONG_BIT == 32 + return _mm_cvtss_si32(_mm_load_ss(&x)); +#endif +#else return lrintf(x); +#endif } /** @@ -97,7 +111,15 @@ _mesa_lroundevenf(float x) static inline long _mesa_lroundeven(double x) { +#ifdef __x86_64__ +#if LONG_BIT == 64 + return _mm_cvtsd_si64(_mm_load_sd(&x)); +#elif LONG_BIT == 32 + return _mm_cvtsd_si32(_mm_load_sd(&x)); +#endif +#else return lrint(x); +#endif } #endif |