summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm/lp_bld_arit.c
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@vmware.com>2016-10-18 03:37:37 +0200
committerRoland Scheidegger <sroland@vmware.com>2016-10-19 01:44:59 +0200
commit6f2f0daeb49e132f44ca9bf930049470a39c970f (patch)
tree10cf69215164c1d3b58130b8d48ccbd594059615 /src/gallium/auxiliary/gallivm/lp_bld_arit.c
parent7e1e06bc75bd9fc4a5b69c19fc140a6b4775915c (diff)
downloadexternal_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.zip
external_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.tar.gz
external_mesa3d-6f2f0daeb49e132f44ca9bf930049470a39c970f.tar.bz2
gallivm: Use native packs and unpacks for the lerps
For the texturing packs, things looked pretty terrible. For every lerp, we were repacking the values, and while those look sort of cheap with 128bit, with 256bit we end up with 2 of them instead of just 1 but worse, plus 2 extracts too (the unpack, however, works fine with a single instruction, albeit only with llvm 3.8 - the vpmovzxbw). Ideally we'd use more clever pack for llvmpipe backend conversion too since we actually use the "wrong" shuffle (which is more work) when doing the fs twiddle just so we end up with the wrong order for being able to do native pack when converting from 2x8f -> 1x16b. But this requires some refactoring, since the untwiddle is separate from conversion. This is only used for avx2 256bit pack/unpack for now. Improves openarena scores by 8% or so, though overall it's still pretty disappointing how much faster 256bit vectors are even with avx2 (or rather, aren't...). And, of course, eliminating the needless packs/unpacks in the first place would eliminate most of that advantage (not quite all) from this patch. Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_arit.c')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index f5cacc4..3ea0734 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1046,14 +1046,14 @@ lp_build_mul(struct lp_build_context *bld,
struct lp_type wide_type = lp_wider_type(type);
LLVMValueRef al, ah, bl, bh, abl, abh, ab;
- lp_build_unpack2(bld->gallivm, type, wide_type, a, &al, &ah);
- lp_build_unpack2(bld->gallivm, type, wide_type, b, &bl, &bh);
+ lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah);
+ lp_build_unpack2_native(bld->gallivm, type, wide_type, b, &bl, &bh);
/* PMULLW, PSRLW, PADDW */
abl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl);
abh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh);
- ab = lp_build_pack2(bld->gallivm, wide_type, type, abl, abh);
+ ab = lp_build_pack2_native(bld->gallivm, wide_type, type, abl, abh);
return ab;
}
@@ -1350,9 +1350,9 @@ lp_build_lerp(struct lp_build_context *bld,
lp_build_context_init(&wide_bld, bld->gallivm, wide_type);
- lp_build_unpack2(bld->gallivm, type, wide_type, x, &xl, &xh);
- lp_build_unpack2(bld->gallivm, type, wide_type, v0, &v0l, &v0h);
- lp_build_unpack2(bld->gallivm, type, wide_type, v1, &v1l, &v1h);
+ lp_build_unpack2_native(bld->gallivm, type, wide_type, x, &xl, &xh);
+ lp_build_unpack2_native(bld->gallivm, type, wide_type, v0, &v0l, &v0h);
+ lp_build_unpack2_native(bld->gallivm, type, wide_type, v1, &v1l, &v1h);
/*
* Lerp both halves.
@@ -1363,7 +1363,7 @@ lp_build_lerp(struct lp_build_context *bld,
resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l, flags);
resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h, flags);
- res = lp_build_pack2(bld->gallivm, wide_type, type, resl, resh);
+ res = lp_build_pack2_native(bld->gallivm, wide_type, type, resl, resh);
} else {
res = lp_build_lerp_simple(bld, x, v0, v1, flags);
}