summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-06-30 10:57:34 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-07-05 00:47:12 +0200
commit476e9cee1d0cbe321c401277214e6c36ce5b18c9 (patch)
tree647dae44f3b5b7f0caa30890574caf7017ee1df2 /src/gallium
parenta675c6a00036e1718230359ff0407592ef10db9a (diff)
downloadexternal_mesa3d-476e9cee1d0cbe321c401277214e6c36ce5b18c9.zip
external_mesa3d-476e9cee1d0cbe321c401277214e6c36ce5b18c9.tar.gz
external_mesa3d-476e9cee1d0cbe321c401277214e6c36ce5b18c9.tar.bz2
radeonsi: compute only one set of interpolation (i,j) when MSAA is disabled
This should increase the PS launch rate for shaders using at least 2 pairs of perspective (i,j) and same for linear. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c74
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c13
3 files changed, 88 insertions, 3 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index da4a6cb..a59c28e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1300,6 +1300,20 @@ static unsigned select_interp_param(struct si_shader_context *ctx,
return SI_PARAM_LINEAR_SAMPLE;
}
}
+ if (ctx->shader->key.ps.prolog.force_persp_center_interp) {
+ switch (param) {
+ case SI_PARAM_PERSP_CENTROID:
+ case SI_PARAM_PERSP_SAMPLE:
+ return SI_PARAM_PERSP_CENTER;
+ }
+ }
+ if (ctx->shader->key.ps.prolog.force_linear_center_interp) {
+ switch (param) {
+ case SI_PARAM_LINEAR_CENTROID:
+ case SI_PARAM_LINEAR_SAMPLE:
+ return SI_PARAM_PERSP_CENTER;
+ }
+ }
return param;
}
@@ -6382,6 +6396,8 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
fprintf(f, " prolog.force_persp_sample_interp = %u\n", key->ps.prolog.force_persp_sample_interp);
fprintf(f, " prolog.force_linear_sample_interp = %u\n", key->ps.prolog.force_linear_sample_interp);
+ fprintf(f, " prolog.force_persp_center_interp = %u\n", key->ps.prolog.force_persp_center_interp);
+ fprintf(f, " prolog.force_linear_center_interp = %u\n", key->ps.prolog.force_linear_center_interp);
fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
@@ -7255,6 +7271,40 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
linear_sample[i], base + 10 + i, "");
}
+ /* Force center interpolation. */
+ if (key->ps_prolog.states.force_persp_center_interp) {
+ unsigned i, base = key->ps_prolog.num_input_sgprs;
+ LLVMValueRef persp_center[2];
+
+ /* Read PERSP_CENTER. */
+ for (i = 0; i < 2; i++)
+ persp_center[i] = LLVMGetParam(func, base + 2 + i);
+ /* Overwrite PERSP_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_center[i], base + i, "");
+ /* Overwrite PERSP_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_center[i], base + 4 + i, "");
+ }
+ if (key->ps_prolog.states.force_linear_center_interp) {
+ unsigned i, base = key->ps_prolog.num_input_sgprs;
+ LLVMValueRef linear_center[2];
+
+ /* Read LINEAR_CENTER. */
+ for (i = 0; i < 2; i++)
+ linear_center[i] = LLVMGetParam(func, base + 8 + i);
+ /* Overwrite LINEAR_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_center[i], base + 6 + i, "");
+ /* Overwrite LINEAR_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_center[i], base + 10 + i, "");
+ }
+
/* Tell LLVM to insert WQM instruction sequence when needed. */
if (key->ps_prolog.wqm) {
LLVMAddTargetDependentFunctionAttr(func,
@@ -7414,7 +7464,9 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
prolog_key.ps_prolog.wqm = info->uses_derivatives &&
(prolog_key.ps_prolog.colors_read ||
prolog_key.ps_prolog.states.force_persp_sample_interp ||
- prolog_key.ps_prolog.states.force_linear_sample_interp);
+ prolog_key.ps_prolog.states.force_linear_sample_interp ||
+ prolog_key.ps_prolog.states.force_persp_center_interp ||
+ prolog_key.ps_prolog.states.force_linear_center_interp);
if (info->colors_read) {
unsigned *color = shader->selector->color_attr_index;
@@ -7443,6 +7495,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
/* Force the interpolation location for colors here. */
if (shader->key.ps.prolog.force_persp_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ if (shader->key.ps.prolog.force_persp_center_interp)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:
@@ -7468,6 +7522,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
/* Force the interpolation location for colors here. */
if (shader->key.ps.prolog.force_linear_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ if (shader->key.ps.prolog.force_linear_center_interp)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:
@@ -7499,6 +7555,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
if (prolog_key.ps_prolog.colors_read ||
prolog_key.ps_prolog.states.force_persp_sample_interp ||
prolog_key.ps_prolog.states.force_linear_sample_interp ||
+ prolog_key.ps_prolog.states.force_persp_center_interp ||
+ prolog_key.ps_prolog.states.force_linear_center_interp ||
prolog_key.ps_prolog.states.poly_stipple) {
shader->prolog =
si_get_shader_part(sscreen, &sscreen->ps_prologs,
@@ -7544,6 +7602,20 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
}
+ if (shader->key.ps.prolog.force_persp_center_interp &&
+ (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
+ }
+ if (shader->key.ps.prolog.force_linear_center_interp &&
+ (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
+ }
/* POW_W_FLOAT requires that one of the perspective weights is enabled. */
if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 6c2e832..0647736 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -315,9 +315,9 @@ struct si_ps_prolog_bits {
unsigned poly_stipple:1;
unsigned force_persp_sample_interp:1;
unsigned force_linear_sample_interp:1;
+ unsigned force_persp_center_interp:1;
+ unsigned force_linear_center_interp:1;
/* TODO:
- * - add force_center_interp if MSAA is disabled and centroid or
- * sample are present
* - add force_center_interp_bc_optimize to force center interpolation
* based on the bc_optimize SGPR bit if MSAA is enabled, centroid is
* present and sample isn't present.
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index cf5c1f9..d679825 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -946,6 +946,19 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->ps.prolog.force_linear_sample_interp =
sel->info.uses_linear_center ||
sel->info.uses_linear_centroid;
+ } else if (!rs->multisample_enable ||
+ sctx->framebuffer.nr_samples <= 1) {
+ /* Make sure SPI doesn't compute more than 1 pair
+ * of (i,j), which is the optimization here. */
+ key->ps.prolog.force_persp_center_interp =
+ sel->info.uses_persp_center +
+ sel->info.uses_persp_centroid +
+ sel->info.uses_persp_sample > 1;
+
+ key->ps.prolog.force_linear_center_interp =
+ sel->info.uses_linear_center +
+ sel->info.uses_linear_centroid +
+ sel->info.uses_linear_sample > 1;
}
}