gallivm: do per-pixel lod calculations for explicit lod

d3d10 requires per-pixel lod calculations for explicit lod, lod bias and explicit derivatives, and we should probably do it for OpenGL too - at least if they are used from vertex or geometry shaders (so doesn't apply to lod bias) this doesn't just affect neighboring pixels. Some code was already there to handle this so fix it up and enable it. There will no doubt be a performance hit unfortunately, we could do better if we'd knew we had a real vector shift instruction (with variable shift count) but this requires AVX2 on x86 (or a AMD Bulldozer family cpu). Don't do anything for lod bias and explicit derivatives yet, though no special magic should be needed for them neither. Likewise, the size query is still broken just the same. v2: Use information if lod is a (broadcast) scalar or not. The idea would be to base this on the actual value, for now just pretend it's a scalar in fs and not a scalar otherwise (so, per-pixel lod is only used in gs/vs but same code is generated for fs as before). Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
author: Roland Scheidegger <sroland@vmware.com> 2013-07-04 19:40:11 +0200
committer: Roland Scheidegger <sroland@vmware.com> 2013-07-04 19:42:04 +0200
commit: f3bbf65929e395360e5565d08d015977dd5b79fa (patch)
tree: 75da0af344f282ea76a965f0b9f9af466837c0a7 /src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
parent: bbd1e60198548a12be3405fc32dd39a87e8968ab (diff)
download: external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.zip
external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.tar.gz
external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.tar.bz2
1 files changed, 11 insertions, 15 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 104c24d..da416aa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
-                                                     bld->perquadf_bld.type, 256.0);
-      LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type);
+                                                     bld->levelf_bld.type, 256.0);
+      LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
       struct lp_build_if_state if_ctx;
       LLVMValueRef need_lerp;
       unsigned num_quads = bld->coord_bld.type.length / 4;
@@ -1433,9 +1433,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
 
       /* need_lerp = lod_fpart > 0 */
-      if (num_quads == 1) {
+      if (bld->num_lods == 1) {
          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
-                                   lod_fpart, bld->perquadi_bld.zero,
+                                   lod_fpart, bld->leveli_bld.zero,
                                    "need_lerp");
       }
       else {
@@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           * lod_fpart values have same sign.
           * We can however then skip the greater than comparison.
           */
-         lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
-                                  bld->perquadi_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart);
+         lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
+                                  bld->leveli_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
       }
 
       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1465,9 +1465,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          lp_build_mipmap_level_sizes(bld, ilevel1,
                                      &size1,
                                      &row_stride1_vec, &img_stride1_vec);
-         lp_build_mipmap_level_sizes(bld, ilevel1,
-                                     &size1,
-                                     &row_stride1_vec, &img_stride1_vec);
          if (bld->num_lods == 1) {
             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
          }
@@ -1511,7 +1508,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
          /* interpolate samples from the two mipmap levels */
 
-         if (num_quads == 1) {
+         if (num_quads == 1 && bld->num_lods == 1) {
             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
 
@@ -1526,17 +1523,16 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 #endif
          }
          else {
-            const unsigned num_chans_per_quad = 4 * 4;
-            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->perquadi_bld.type.length);
+            unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
+            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
 
             /* Take the LSB of lod_fpart */
             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
 
             /* Broadcast each lod weight into their respective channels */
-            assert(u8n_bld.type.length == num_quads * num_chans_per_quad);
             for (i = 0; i < u8n_bld.type.length; ++i) {
-               shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_quad);
+               shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
             }
             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
author	Roland Scheidegger <sroland@vmware.com>	2013-07-04 19:40:11 +0200
committer	Roland Scheidegger <sroland@vmware.com>	2013-07-04 19:42:04 +0200
commit	f3bbf65929e395360e5565d08d015977dd5b79fa (patch)
tree	75da0af344f282ea76a965f0b9f9af466837c0a7 /src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
parent	bbd1e60198548a12be3405fc32dd39a87e8968ab (diff)
download	external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.zip external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.tar.gz external_mesa3d-f3bbf65929e395360e5565d08d015977dd5b79fa.tar.bz2