summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2016-08-02 16:01:34 -0700
committerEric Anholt <eric@anholt.net>2016-08-03 10:55:54 -0700
commitbc1fc9c98539f38f5a29b314d4a993a2e2f7ca0a (patch)
tree06709a819a2d87b53148f2b777afc71e656126b5
parente97e9e62a1de9689ce8a2a77cb9657b387a1d14b (diff)
downloadexternal_mesa3d-bc1fc9c98539f38f5a29b314d4a993a2e2f7ca0a.zip
external_mesa3d-bc1fc9c98539f38f5a29b314d4a993a2e2f7ca0a.tar.gz
external_mesa3d-bc1fc9c98539f38f5a29b314d4a993a2e2f7ca0a.tar.bz2
vc4: Avoid generating a custom shader per level in glGenerateMipmaps().
We were baking in the LOD of the source level to each shader. Instead, pass it in as a uniform -- this requires storing it to a temp register, but that's better than compiling a ton of separate shaders: total instructions in shared programs: 115032 -> 115036 (0.00%) instructions in affected programs: 96 -> 100 (4.17%) LOST: 572
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c10
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h4
-rw-r--r--src/gallium/drivers/vc4/vc4_uniforms.c18
3 files changed, 25 insertions, 7 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 0afd8c6..28b3981 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -393,8 +393,8 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
}
}
- if (c->key->tex[unit].forced_first_level) {
- lod = qir_uniform_f(c, c->key->tex[unit].forced_first_level);
+ if (c->key->tex[unit].force_first_level) {
+ lod = qir_uniform(c, QUNIFORM_TEXTURE_FIRST_LEVEL, unit);
is_txl = true;
is_txb = false;
}
@@ -2353,10 +2353,8 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
key->tex[i].compare_func = sampler_state->compare_func;
key->tex[i].wrap_s = sampler_state->wrap_s;
key->tex[i].wrap_t = sampler_state->wrap_t;
- if (vc4_sampler->force_first_level) {
- key->tex[i].forced_first_level =
- sampler->u.tex.first_level;
- }
+ key->tex[i].force_first_level =
+ vc4_sampler->force_first_level;
}
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 81b5565..b8ded30 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -246,6 +246,8 @@ enum quniform_contents {
/** A reference to a texture config parameter 2 cubemap stride uniform */
QUNIFORM_TEXTURE_CONFIG_P2,
+ QUNIFORM_TEXTURE_FIRST_LEVEL,
+
QUNIFORM_TEXTURE_MSAA_ADDR,
QUNIFORM_UBO_ADDR,
@@ -314,7 +316,7 @@ struct vc4_key {
unsigned compare_func:3;
unsigned wrap_s:3;
unsigned wrap_t:3;
- unsigned forced_first_level:8;
+ bool force_first_level:1;
};
struct {
uint16_t msaa_width, msaa_height;
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
index 528f10e..e8cd153 100644
--- a/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -72,6 +72,18 @@ write_texture_p2(struct vc4_context *vc4,
}
static void
+write_texture_first_level(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t data)
+{
+ uint32_t unit = data & 0xffff;
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+
+ cl_aligned_f(uniforms, texture->u.tex.first_level);
+}
+
+static void
write_texture_msaa_addr(struct vc4_context *vc4,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
@@ -253,6 +265,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
uinfo->data[i]);
break;
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ write_texture_first_level(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
case QUNIFORM_UBO_ADDR:
cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
break;
@@ -373,6 +390,7 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
case QUNIFORM_TEXTURE_CONFIG_P1:
case QUNIFORM_TEXTURE_CONFIG_P2:
case QUNIFORM_TEXTURE_BORDER_COLOR:
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
case QUNIFORM_TEXTURE_MSAA_ADDR:
case QUNIFORM_TEXRECT_SCALE_X:
case QUNIFORM_TEXRECT_SCALE_Y: