summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_context.c
diff options
context:
space:
mode:
authorJordan Justen <jordan.l.justen@intel.com>2016-06-14 15:04:34 -0700
committerJordan Justen <jordan.l.justen@intel.com>2016-08-26 10:09:22 -0700
commit30fee52036ac5e0180073ace4a8fd760556495be (patch)
tree250b344a10caa56308bf01e4031a86c7e8980730 /src/mesa/drivers/dri/i965/brw_context.c
parent861c9cbee3d741ea332a9ceee8ae64db49f114c2 (diff)
downloadexternal_mesa3d-30fee52036ac5e0180073ace4a8fd760556495be.zip
external_mesa3d-30fee52036ac5e0180073ace4a8fd760556495be.tar.gz
external_mesa3d-30fee52036ac5e0180073ace4a8fd760556495be.tar.bz2
i965/hsw: Don't advertise more than 64 threads for compute shaders
thread_width_max in the GPGPU walker command limits us to a maximum of 64 threads. This fixes a crash on Haswell in the OpenGLES 3.1 conformance test suite which tests the advertised limits of the max invocation counts. Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_context.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c39
1 files changed, 25 insertions, 14 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 1364393..58cd03d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -769,12 +769,35 @@ brw_initialize_context_constants(struct brw_context *brw)
}
static void
-brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
+brw_initialize_cs_context_constants(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
+ const struct intel_screen *screen = brw->intelScreen;
+ const struct brw_device_info *devinfo = screen->devinfo;
+
+ /* FINISHME: Do this for all platforms that the kernel supports */
+ if (brw->is_cherryview &&
+ screen->subslice_total > 0 && screen->eu_total > 0) {
+ /* Logical CS threads = EUs per subslice * 7 threads per EU */
+ brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
+
+ /* Fuse configurations may give more threads than expected, never less. */
+ if (brw->max_cs_threads < devinfo->max_cs_threads)
+ brw->max_cs_threads = devinfo->max_cs_threads;
+ } else {
+ brw->max_cs_threads = devinfo->max_cs_threads;
+ }
+
/* Maximum number of scalar compute shader invocations that can be run in
* parallel in the same subslice assuming SIMD32 dispatch.
+ *
+ * We don't advertise more than 64 threads, because we are limited to 64 by
+ * our usage of thread_width_max in the gpgpu walker command. This only
+ * currently impacts Haswell, which otherwise might be able to advertise 70
+ * threads. With SIMD32 and 64 threads, Haswell still provides twice the
+ * required the number of invocation needed for ARB_compute_shader.
*/
+ const unsigned max_threads = MIN2(64, brw->max_cs_threads);
const uint32_t max_invocations = 32 * max_threads;
ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
@@ -978,7 +1001,7 @@ brwCreateContext(gl_api api,
if (INTEL_DEBUG & DEBUG_PERF)
brw->perf_debug = true;
- brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
+ brw_initialize_cs_context_constants(brw);
brw_initialize_context_constants(brw);
ctx->Const.ResetStrategy = notify_reset
@@ -1025,18 +1048,6 @@ brwCreateContext(gl_api api,
brw->max_ds_threads = devinfo->max_ds_threads;
brw->max_gs_threads = devinfo->max_gs_threads;
brw->max_wm_threads = devinfo->max_wm_threads;
- /* FINISHME: Do this for all platforms that the kernel supports */
- if (brw->is_cherryview &&
- screen->subslice_total > 0 && screen->eu_total > 0) {
- /* Logical CS threads = EUs per subslice * 7 threads per EU */
- brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
-
- /* Fuse configurations may give more threads than expected, never less. */
- if (brw->max_cs_threads < devinfo->max_cs_threads)
- brw->max_cs_threads = devinfo->max_cs_threads;
- } else {
- brw->max_cs_threads = devinfo->max_cs_threads;
- }
brw->urb.size = devinfo->urb.size;
brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;