gallivm: Use native packs and unpacks for the lerps

For the texturing packs, things looked pretty terrible. For every lerp, we were repacking the values, and while those look sort of cheap with 128bit, with 256bit we end up with 2 of them instead of just 1 but worse, plus 2 extracts too (the unpack, however, works fine with a single instruction, albeit only with llvm 3.8 - the vpmovzxbw). Ideally we'd use more clever pack for llvmpipe backend conversion too since we actually use the "wrong" shuffle (which is more work) when doing the fs twiddle just so we end up with the wrong order for being able to do native pack when converting from 2x8f -> 1x16b. But this requires some refactoring, since the untwiddle is separate from conversion. This is only used for avx2 256bit pack/unpack for now. Improves openarena scores by 8% or so, though overall it's still pretty disappointing how much faster 256bit vectors are even with avx2 (or rather, aren't...). And, of course, eliminating the needless packs/unpacks in the first place would eliminate most of that advantage (not quite all) from this patch. Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
author: Roland Scheidegger <sroland@vmware.com> 2016-10-18 03:37:37 +0200
committer: Roland Scheidegger <sroland@vmware.com> 2016-10-19 01:44:59 +0200
commit: 6f2f0daeb49e132f44ca9bf930049470a39c970f (patch)
tree: 10cf69215164c1d3b58130b8d48ccbd594059615 /src/gallium/auxiliary/gallivm/lp_bld_arit.c
parent: 7e1e06bc75bd9fc4a5b69c19fc140a6b4775915c (diff)
download: external_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.zip
external_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.tar.gz
external_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.tar.bz2
1 files changed, 7 insertions, 7 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index f5cacc4..3ea0734 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1046,14 +1046,14 @@ lp_build_mul(struct lp_build_context *bld,
       struct lp_type wide_type = lp_wider_type(type);
       LLVMValueRef al, ah, bl, bh, abl, abh, ab;
 
-      lp_build_unpack2(bld->gallivm, type, wide_type, a, &al, &ah);
-      lp_build_unpack2(bld->gallivm, type, wide_type, b, &bl, &bh);
+      lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah);
+      lp_build_unpack2_native(bld->gallivm, type, wide_type, b, &bl, &bh);
 
       /* PMULLW, PSRLW, PADDW */
       abl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl);
       abh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh);
 
-      ab = lp_build_pack2(bld->gallivm, wide_type, type, abl, abh);
+      ab = lp_build_pack2_native(bld->gallivm, wide_type, type, abl, abh);
 
       return ab;
    }
@@ -1350,9 +1350,9 @@ lp_build_lerp(struct lp_build_context *bld,
 
       lp_build_context_init(&wide_bld, bld->gallivm, wide_type);
 
-      lp_build_unpack2(bld->gallivm, type, wide_type, x,  &xl,  &xh);
-      lp_build_unpack2(bld->gallivm, type, wide_type, v0, &v0l, &v0h);
-      lp_build_unpack2(bld->gallivm, type, wide_type, v1, &v1l, &v1h);
+      lp_build_unpack2_native(bld->gallivm, type, wide_type, x,  &xl,  &xh);
+      lp_build_unpack2_native(bld->gallivm, type, wide_type, v0, &v0l, &v0h);
+      lp_build_unpack2_native(bld->gallivm, type, wide_type, v1, &v1l, &v1h);
 
       /*
        * Lerp both halves.
@@ -1363,7 +1363,7 @@ lp_build_lerp(struct lp_build_context *bld,
       resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l, flags);
       resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h, flags);
 
-      res = lp_build_pack2(bld->gallivm, wide_type, type, resl, resh);
+      res = lp_build_pack2_native(bld->gallivm, wide_type, type, resl, resh);
    } else {
       res = lp_build_lerp_simple(bld, x, v0, v1, flags);
    }
author	Roland Scheidegger <sroland@vmware.com>	2016-10-18 03:37:37 +0200
committer	Roland Scheidegger <sroland@vmware.com>	2016-10-19 01:44:59 +0200
commit	6f2f0daeb49e132f44ca9bf930049470a39c970f (patch)
tree	10cf69215164c1d3b58130b8d48ccbd594059615 /src/gallium/auxiliary/gallivm/lp_bld_arit.c
parent	7e1e06bc75bd9fc4a5b69c19fc140a6b4775915c (diff)
download	external_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.zip external_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.tar.gz external_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.tar.bz2