summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/gen7_cs_state.c
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2016-06-09 16:56:31 -0700
committerKenneth Graunke <kenneth@whitecape.org>2016-06-12 00:40:14 -0700
commit147a90d82a5de637f968e0d5f383cabcb792f1ce (patch)
tree274f226d15f40f96c413b767e79f40d532096f41 /src/mesa/drivers/dri/i965/gen7_cs_state.c
parenta7d029d3dfac1da2701be75ff4d1589ac562e916 (diff)
downloadexternal_mesa3d-147a90d82a5de637f968e0d5f383cabcb792f1ce.zip
external_mesa3d-147a90d82a5de637f968e0d5f383cabcb792f1ce.tar.gz
external_mesa3d-147a90d82a5de637f968e0d5f383cabcb792f1ce.tar.bz2
i965: Fix Haswell CS per-thread scratch space encoding.
Most scratch stages use power of two sizes, in kilobytes, where 0 means 1kB. But compute shaders on Haswell have a minimum of 2kB, and use a representation where 0 = 2kB. This meant that we were effectively telling the hardware to allocate each thread twice as much space as we meant to, while simultaneously not allocating that much space in the buffer, leading to overflows. Note that the existing code is completely wrong for Ivybridge, but that will take additional work to sort out, so I've left it as is for now. A subsequent commit will take care of that. Together with the previous patches, this fixes rendering corruption on Synmark's Gl43CSDof on Haswell. Cc: "12.0" <mesa-stable@lists.freedesktop.org> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/gen7_cs_state.c')
-rw-r--r--src/mesa/drivers/dri/i965/gen7_cs_state.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index a71a595..42cd61f 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -64,14 +64,26 @@ brw_upload_cs_state(struct brw_context *brw)
OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
if (prog_data->total_scratch) {
- if (brw->gen >= 8)
+ if (brw->gen >= 8) {
+ /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
+ * where 0 = 1k, 1 = 4k, 2 = 8k, ..., 11 = 2M.
+ */
OUT_RELOC64(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(prog_data->total_scratch) - 11);
- else
+ } else if (brw->is_haswell) {
+ /* Haswell's Per Thread Scratch Space is in the range [0, 10]
+ * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
+ */
+ OUT_RELOC(stage_state->scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(prog_data->total_scratch) - 12);
+ } else {
+ /* This is wrong but we'll fix it later */
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(prog_data->total_scratch) - 11);
+ }
} else {
OUT_BATCH(0);
if (brw->gen >= 8)