summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2016-05-09 17:48:24 -0700
committerJason Ekstrand <jason.ekstrand@intel.com>2016-05-14 13:34:05 -0700
commit712a980adde0b14eee8b4accd02af9b9740091a2 (patch)
tree7bb3fbc03b59eb51c491013c8bcf29bd2b7e0f8b /src/mesa
parenta2f50d87b6e9d07c6974ef309cc99acf56b2dc09 (diff)
downloadexternal_mesa3d-712a980adde0b14eee8b4accd02af9b9740091a2.zip
external_mesa3d-712a980adde0b14eee8b4accd02af9b9740091a2.tar.gz
external_mesa3d-712a980adde0b14eee8b4accd02af9b9740091a2.tar.bz2
i965/fs: Rework the persample shading key/prog_data bits
This commit reworks and simplifies the way we handle persample shading in the shader key and prog_data. The previous approach had three different key bits that had slightly different and hard-to-decern meanings while the new bits are far more clear. This commit changes it to two easily understood bits that communicate everything we need: 1) key->persample_interp: means that the user has requested persample interpolation through the API. This is equivalent to having SAMPLE_SHADING enabled and having MIN_SAMPLE_SHADING_VALUE set high enough that you actually get multiple per-sample invocations. 2) key->multisample_fbo: means that the shader will be running on an actual multi-sampled framebuffer. This commit also adds a new "persample_dispatch" bit to prog_data which indicates that the shader should be run in persample mode. This way the state setup code doesn't have to look at the fragment program or GL state and can just pull that data out of the prog_data. In theory, this shuffle could mean more recompiles. However, in practice, we were shoving enough state into the key before that we were probably hitting a recompile on every per-sample shader anyway. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp40
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c19
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c20
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c22
6 files changed, 57 insertions, 54 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 3d1dc88..3fcd7e8 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -242,12 +242,11 @@ struct brw_wm_prog_key {
uint8_t iz_lookup;
bool stats_wm:1;
bool flat_shade:1;
- bool persample_shading:1;
unsigned nr_color_regions:5;
bool replicate_alpha:1;
bool render_to_fbo:1;
bool clamp_fragment_color:1;
- bool compute_pos_offset:1;
+ bool persample_interp:1;
bool multisample_fbo:1;
unsigned line_aa:2;
bool high_quality_derivatives:1;
@@ -386,6 +385,7 @@ struct brw_wm_prog_data {
bool early_fragment_tests;
bool no_8;
bool dual_src_blend;
+ bool persample_dispatch;
bool uses_pos_offset;
bool uses_omask;
bool uses_kill;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 89b30c7..5e08d71 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1195,8 +1195,8 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
inst->no_dd_clear = true;
inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode,
- mod_centroid && !key->persample_shading,
- mod_sample || key->persample_shading);
+ mod_centroid && !key->persample_interp,
+ mod_sample || key->persample_interp);
inst->predicate = BRW_PREDICATE_NORMAL;
inst->predicate_inverse = false;
if (devinfo->has_pln)
@@ -1204,8 +1204,8 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
} else {
emit_linterp(*attr, fs_reg(interp), interpolation_mode,
- mod_centroid && !key->persample_shading,
- mod_sample || key->persample_shading);
+ mod_centroid && !key->persample_interp,
+ mod_sample || key->persample_interp);
}
if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
bld.MUL(*attr, *attr, this->pixel_w);
@@ -1262,10 +1262,10 @@ void
fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
{
assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
+ brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
assert(dst.type == BRW_REGISTER_TYPE_F);
- if (key->compute_pos_offset) {
+ if (wm_prog_data->persample_dispatch) {
/* Convert int_sample_pos to floating point */
bld.MOV(dst, int_sample_pos);
/* Scale to the range [0, 1] */
@@ -1430,7 +1430,7 @@ fs_reg *
fs_visitor::emit_samplemaskin_setup()
{
assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_key *key = (brw_wm_prog_key *) this->key;
+ brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
assert(devinfo->gen >= 6);
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
@@ -1438,7 +1438,7 @@ fs_visitor::emit_samplemaskin_setup()
fs_reg coverage_mask(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
BRW_REGISTER_TYPE_D));
- if (key->persample_shading) {
+ if (wm_prog_data->persample_dispatch) {
/* gl_SampleMaskIn[] comes from two sources: the input coverage mask,
* and a mask representing which sample is being processed by the
* current shader invocation.
@@ -5098,7 +5098,6 @@ fs_visitor::setup_fs_payload_gen6()
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
unsigned barycentric_interp_modes =
(stage == MESA_SHADER_FRAGMENT) ?
@@ -5151,9 +5150,19 @@ fs_visitor::setup_fs_payload_gen6()
}
}
- prog_data->uses_pos_offset = key->compute_pos_offset;
/* R31: MSAA position offsets. */
- if (prog_data->uses_pos_offset) {
+ if (prog_data->persample_dispatch &&
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS)) {
+ /* From the Ivy Bridge PRM documentation for 3DSTATE_PS:
+ *
+ * "MSDISPMODE_PERSAMPLE is required in order to select
+ * POSOFFSET_SAMPLE"
+ *
+ * So we can only really get sample positions if we are doing real
+ * per-sample dispatch. If we need gl_SamplePosition and we don't have
+ * persample dispatch, we hard-code it to 0.5.
+ */
+ prog_data->uses_pos_offset = true;
payload.sample_pos_reg = payload.num_regs;
payload.num_regs++;
}
@@ -5993,12 +6002,19 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
prog_data->computed_stencil =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
+ prog_data->persample_dispatch =
+ key->multisample_fbo &&
+ (key->persample_interp ||
+ (shader->info.system_values_read & (SYSTEM_BIT_SAMPLE_ID |
+ SYSTEM_BIT_SAMPLE_POS)) ||
+ shader->info.fs.uses_sample_qualifier);
+
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
prog_data->barycentric_interp_modes =
brw_compute_barycentric_interp_modes(compiler->devinfo,
key->flat_shade,
- key->persample_shading,
+ key->persample_interp,
shader);
fs_visitor v(compiler, log_data, mem_ctx, key,
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index ced9708..395b0b8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -250,8 +250,8 @@ brw_wm_debug_recompile(struct brw_context *brw,
old_key->stats_wm, key->stats_wm);
found |= key_debug(brw, "flat shading",
old_key->flat_shade, key->flat_shade);
- found |= key_debug(brw, "per-sample shading",
- old_key->persample_shading, key->persample_shading);
+ found |= key_debug(brw, "per-sample interpolation",
+ old_key->persample_interp, key->persample_interp);
found |= key_debug(brw, "number of color buffers",
old_key->nr_color_regions, key->nr_color_regions);
found |= key_debug(brw, "MRT alpha test or alpha-to-coverage",
@@ -528,15 +528,14 @@ brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
/* _NEW_BUFFERS _NEW_MULTISAMPLE */
/* Ignore sample qualifier while computing this flag. */
- key->persample_shading =
- _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
+ if (ctx->Multisample.Enabled) {
+ key->persample_interp =
+ ctx->Multisample.SampleShading &&
+ (ctx->Multisample.MinSampleShadingValue *
+ _mesa_geometric_samples(ctx->DrawBuffer) > 1);
- key->compute_pos_offset =
- _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
- fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
-
- key->multisample_fbo = ctx->Multisample.Enabled &&
- _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ }
/* BRW_NEW_VUE_MAP_GEOM_OUT */
if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 335920c..dd33926 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -130,12 +130,10 @@ gen6_upload_wm_state(struct brw_context *brw,
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
- assert(min_inv_per_frag >= 1);
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
- if (!prog_data->no_8 && min_inv_per_frag == 1) {
+ if (!prog_data->no_8 && !prog_data->persample_dispatch) {
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
dw4 |= (prog_data->base.dispatch_grf_start_reg <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
@@ -198,7 +196,7 @@ gen6_upload_wm_state(struct brw_context *brw,
else
dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
- if (min_inv_per_frag > 1)
+ if (prog_data->persample_dispatch)
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
else {
dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 2c3930f..945fbbd 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -91,7 +91,7 @@ upload_wm_state(struct brw_context *brw)
else
dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
- if (_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false) > 1)
+ if (prog_data->persample_dispatch)
dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
else
dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
@@ -152,7 +152,6 @@ gen7_upload_ps_state(struct brw_context *brw,
bool enable_dual_src_blend, unsigned sample_mask,
unsigned fast_clear_op)
{
- struct gl_context *ctx = &brw->ctx;
uint32_t dw2, dw4, dw5, ksp0, ksp2;
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
@@ -216,18 +215,15 @@ gen7_upload_ps_state(struct brw_context *brw,
if (prog_data->num_varying_inputs != 0)
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
- /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
- * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
- * is successfully compiled. In majority of the cases that bring us
- * better performance than 'SIMD8 only' dispatch.
- */
- int min_inv_per_frag =
- _mesa_get_min_invocations_per_fragment(ctx, fp, false);
- assert(min_inv_per_frag >= 1);
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
- if (!prog_data->no_8 && min_inv_per_frag == 1) {
+
+ /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
+ * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
+ * is successfully compiled. In majority of the cases that bring us
+ * better performance than 'SIMD8 only' dispatch.
+ */
+ if (!prog_data->no_8 && !prog_data->persample_dispatch) {
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
dw5 |= (prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 9269a79..d3e1ca3 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -52,8 +52,7 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (prog_data->uses_src_w)
dw1 |= GEN8_PSX_USES_SOURCE_W;
- if (multisampled_fbo &&
- _mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1)
+ if (prog_data->persample_dispatch)
dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
if (prog_data->uses_sample_mask) {
@@ -192,7 +191,6 @@ gen8_upload_ps_state(struct brw_context *brw,
const struct brw_wm_prog_data *prog_data,
uint32_t fast_clear_op)
{
- struct gl_context *ctx = &brw->ctx;
uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
/* Initialize the execution mask with VMask. Otherwise, derivatives are
@@ -246,19 +244,15 @@ gen8_upload_ps_state(struct brw_context *brw,
dw6 |= fast_clear_op;
- /* _NEW_MULTISAMPLE
- * In case of non 1x per sample shading, only one of SIMD8 and SIMD16
- * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
- * is successfully compiled. In majority of the cases that bring us
- * better performance than 'SIMD8 only' dispatch.
- */
- int min_invocations_per_fragment =
- _mesa_get_min_invocations_per_fragment(ctx, fp, false);
- assert(min_invocations_per_fragment >= 1);
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
- if (!prog_data->no_8 && min_invocations_per_fragment == 1) {
+
+ /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
+ * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
+ * is successfully compiled. In majority of the cases that bring us
+ * better performance than 'SIMD8 only' dispatch.
+ */
+ if (!prog_data->no_8 && !prog_data->persample_dispatch) {
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
dw7 |= (prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);