summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2015-09-03 18:15:19 +0300
committerFrancisco Jerez <currojerez@riseup.net>2015-12-09 13:46:05 +0200
commit95ad0bd33ba491c2e34ce02f4ec2d75ffffe16ea (patch)
tree002ae2cd306ade0f85273cf9f71d9e75625ca46c /src/mesa
parentfa1300f75e80f32a0fd40ed53cfdeaecc4c2f012 (diff)
downloadexternal_mesa3d-95ad0bd33ba491c2e34ce02f4ec2d75ffffe16ea.zip
external_mesa3d-95ad0bd33ba491c2e34ce02f4ec2d75ffffe16ea.tar.gz
external_mesa3d-95ad0bd33ba491c2e34ce02f4ec2d75ffffe16ea.tar.bz2
i965: Calculate appropriate L3 partition weights for the current pipeline state.
This calculates a rather conservative partitioning of the L3 cache based on the shaders currently bound to the pipeline and whether they use SLM, atomics, images or scratch space. The result is intended to be fine-tuned later on based on other pipeline state. Note that the L3 partitioning calculated for VLV in the non-SLM non-DC case differs from the hardware defaults in that it doesn't include a DC partition and has twice as much RO cache space -- This is an intentional functional change that improves performance in several bandwidth-bound benchmarks on VLV (5% significance): SynMark OglTexFilterAniso by 14.18%, SynMark OglTexFilterTri by 7.15%, Unigine Heaven by 4.91%, SynMark OglShMapPcf by 2.15%, GpuTest Fur by 1.83%, SynMark OglDrvRes by 1.80%, SynMark OglVsTangent by 1.71%, and a few other benchmarks from the Finnish system by less than 1%. Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h1
-rw-r--r--src/mesa/drivers/dri/i965/gen7_l3_state.c53
2 files changed, 54 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index b2251bf..218d9c7 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -300,6 +300,7 @@ struct brw_stage_prog_data {
unsigned curb_read_length;
unsigned total_scratch;
+ unsigned total_shared;
/**
* Register where the thread expects to find input data from the URB
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index dea204d..557b744 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -262,6 +262,59 @@ get_l3_config(const struct brw_device_info *devinfo, struct brw_l3_weights w0)
}
/**
+ * Return a reasonable default L3 configuration for the specified device based
+ * on whether SLM and DC are required. In the non-SLM non-DC case the result
+ * is intended to approximately resemble the hardware defaults.
+ */
+static struct brw_l3_weights
+get_default_l3_weights(const struct brw_device_info *devinfo,
+ bool needs_dc, bool needs_slm)
+{
+ struct brw_l3_weights w = {{ 0 }};
+
+ w.w[L3P_SLM] = needs_slm;
+ w.w[L3P_URB] = 1.0;
+
+ if (devinfo->gen >= 8) {
+ w.w[L3P_ALL] = 1.0;
+ } else {
+ w.w[L3P_DC] = needs_dc ? 0.1 : 0;
+ w.w[L3P_RO] = devinfo->is_baytrail ? 0.5 : 1.0;
+ }
+
+ return norm_l3_weights(w);
+}
+
+/**
+ * Calculate the desired L3 partitioning based on the current state of the
+ * pipeline. For now this simply returns the conservative defaults calculated
+ * by get_default_l3_weights(), but we could probably do better by gathering
+ * more statistics from the pipeline state (e.g. guess of expected URB usage
+ * and bound surfaces), or by using feed-back from performance counters.
+ */
+static struct brw_l3_weights
+get_pipeline_state_l3_weights(const struct brw_context *brw)
+{
+ const struct brw_stage_state *stage_states[] = {
+ &brw->vs.base, &brw->gs.base, &brw->wm.base, &brw->cs.base
+ };
+ bool needs_dc = false, needs_slm = false;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(stage_states); i++) {
+ const struct gl_shader_program *prog =
+ brw->ctx._Shader->CurrentProgram[stage_states[i]->stage];
+ const struct brw_stage_prog_data *prog_data = stage_states[i]->prog_data;
+
+ needs_dc |= (prog && prog->NumAtomicBuffers) ||
+ (prog_data && (prog_data->total_scratch || prog_data->nr_image_params));
+ needs_slm |= prog_data && prog_data->total_shared;
+ }
+
+ return get_default_l3_weights(brw->intelScreen->devinfo,
+ needs_dc, needs_slm);
+}
+
+/**
* Program the hardware to use the specified L3 configuration.
*/
static void