diff options
Diffstat (limited to 'renderscript/clang-include/avx2intrin.h')
-rw-r--r-- | renderscript/clang-include/avx2intrin.h | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/renderscript/clang-include/avx2intrin.h b/renderscript/clang-include/avx2intrin.h index 394fdfe..7485bdd 100644 --- a/renderscript/clang-include/avx2intrin.h +++ b/renderscript/clang-include/avx2intrin.h @@ -771,7 +771,7 @@ _mm256_broadcastsd_pd(__m128d __X) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_broadcastsi128_si256(__m128i __X) { - return (__m256i)__builtin_ia32_vbroadcastsi256(__X); + return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1); } #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ @@ -874,14 +874,21 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b) __m256i __V2 = (V2); \ (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); }) -#define _mm256_extracti128_si256(A, O) __extension__ ({ \ - __m256i __A = (A); \ - (__m128i)__builtin_ia32_extract128i256(__A, (O)); }) - -#define _mm256_inserti128_si256(V1, V2, O) __extension__ ({ \ - __m256i __V1 = (V1); \ - __m128i __V2 = (V2); \ - (__m256i)__builtin_ia32_insert128i256(__V1, __V2, (O)); }) +#define _mm256_extracti128_si256(V, M) __extension__ ({ \ + (__m128i)__builtin_shufflevector( \ + (__v4di)(V), \ + (__v4di)(_mm256_setzero_si256()), \ + (((M) & 1) ? 2 : 0), \ + (((M) & 1) ? 3 : 1) );}) + +#define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \ + (__m256i)__builtin_shufflevector( \ + (__v4di)(V1), \ + (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ + (((M) & 1) ? 0 : 4), \ + (((M) & 1) ? 1 : 5), \ + (((M) & 1) ? 4 : 2), \ + (((M) & 1) ? 5 : 3) );}) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_maskload_epi32(int const *__X, __m256i __M) |