diff options
author | Roland Scheidegger <sroland@vmware.com> | 2013-07-04 19:40:11 +0200 |
---|---|---|
committer | Roland Scheidegger <sroland@vmware.com> | 2013-07-04 19:42:04 +0200 |
commit | f3bbf65929e395360e5565d08d015977dd5b79fa (patch) | |
tree | 75da0af344f282ea76a965f0b9f9af466837c0a7 /src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | |
parent | bbd1e60198548a12be3405fc32dd39a87e8968ab (diff) | |
download | external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.zip external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.tar.gz external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.tar.bz2 |
gallivm: do per-pixel lod calculations for explicit lod
d3d10 requires per-pixel lod calculations for explicit lod, lod bias and
explicit derivatives, and we should probably do it for OpenGL too - at least
if they are used from vertex or geometry shaders (so doesn't apply to lod
bias) this doesn't just affect neighboring pixels.
Some code was already there to handle this so fix it up and enable it.
There will no doubt be a performance hit unfortunately, we could do better
if we'd knew we had a real vector shift instruction (with variable shift
count) but this requires AVX2 on x86 (or a AMD Bulldozer family cpu).
Don't do anything for lod bias and explicit derivatives yet, though
no special magic should be needed for them neither.
Likewise, the size query is still broken just the same.
v2: Use information if lod is a (broadcast) scalar or not. The idea would be
to base this on the actual value, for now just pretend it's a scalar in fs
and not a scalar otherwise (so, per-pixel lod is only used in gs/vs but same
code is generated for fs as before).
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 26 |
1 files changed, 11 insertions, 15 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 104c24d..da416aa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm, - bld->perquadf_bld.type, 256.0); - LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type); + bld->levelf_bld.type, 256.0); + LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type; struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; unsigned num_quads = bld->coord_bld.type.length / 4; @@ -1433,9 +1433,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16"); /* need_lerp = lod_fpart > 0 */ - if (num_quads == 1) { + if (bld->num_lods == 1) { need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, - lod_fpart, bld->perquadi_bld.zero, + lod_fpart, bld->leveli_bld.zero, "need_lerp"); } else { @@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, * lod_fpart values have same sign. * We can however then skip the greater than comparison. */ - lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart, - bld->perquadi_bld.zero); - need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart); + lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart, + bld->leveli_bld.zero); + need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart); } lp_build_if(&if_ctx, bld->gallivm, need_lerp); @@ -1465,9 +1465,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_mipmap_level_sizes(bld, ilevel1, &size1, &row_stride1_vec, &img_stride1_vec); - lp_build_mipmap_level_sizes(bld, ilevel1, - &size1, - &row_stride1_vec, &img_stride1_vec); if (bld->num_lods == 1) { data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); } @@ -1511,7 +1508,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* interpolate samples from the two mipmap levels */ - if (num_quads == 1) { + if (num_quads == 1 && bld->num_lods == 1) { lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, ""); lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart); @@ -1526,17 +1523,16 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, #endif } else { - const unsigned num_chans_per_quad = 4 * 4; - LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->perquadi_bld.type.length); + unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods; + LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length); LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH]; /* Take the LSB of lod_fpart */ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, ""); /* Broadcast each lod weight into their respective channels */ - assert(u8n_bld.type.length == num_quads * num_chans_per_quad); for (i = 0; i < u8n_bld.type.length; ++i) { - shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_quad); + shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod); } lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type), LLVMConstVector(shuffle, u8n_bld.type.length), ""); |