summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/gen7_cs_state.c
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2016-06-08 22:21:22 -0700
committerKenneth Graunke <kenneth@whitecape.org>2016-06-12 00:38:26 -0700
commit87d062a94080373995170f51063a9649c96c6dea (patch)
treea0057a7ebd072ec6104b83d17fb6ec7f396f60c7 /src/mesa/drivers/dri/i965/gen7_cs_state.c
parent3f48548a6f65fe90b97956c7be73268917c6f2f9 (diff)
downloadexternal_mesa3d-87d062a94080373995170f51063a9649c96c6dea.zip
external_mesa3d-87d062a94080373995170f51063a9649c96c6dea.tar.gz
external_mesa3d-87d062a94080373995170f51063a9649c96c6dea.tar.bz2
i965: Fix shared local memory size for Gen9+.
Skylake changes the representation of shared local memory size: Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB | ------------------------------------------------------------------- Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 | ------------------------------------------------------------------- Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | The old formula would substantially underallocate the amount of space. This fixes GPU hangs on Skylake when running with full thread counts. v2: Fix the Vulkan driver too, use a helper function, and fix the table in the comments and commit message. Cc: "12.0" <mesa-stable@lists.freedesktop.org> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/gen7_cs_state.c')
-rw-r--r--src/mesa/drivers/dri/i965/gen7_cs_state.c11
1 files changed, 2 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 750aa2c..a71a595 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -45,6 +45,7 @@ brw_upload_cs_state(struct brw_context *brw)
struct brw_stage_state *stage_state = &brw->cs.base;
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+ const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
brw->vtbl.emit_buffer_surface_state(
@@ -147,15 +148,7 @@ brw_upload_cs_state(struct brw_context *brw)
SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT);
assert(cs_prog_data->threads <= brw->max_cs_threads);
- assert(prog_data->total_shared <= 64 * 1024);
- uint32_t slm_size = 0;
- if (prog_data->total_shared > 0) {
- /* slm_size is in 4k increments, but must be a power of 2. */
- slm_size = 4 * 1024;
- while (slm_size < prog_data->total_shared)
- slm_size <<= 1;
- slm_size /= 4 * 1024;
- }
+ const uint32_t slm_size = encode_slm_size(devinfo, prog_data->total_shared);
desc[dw++] =
SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |