diff options
author | Jordan Justen <jordan.l.justen@intel.com> | 2015-09-02 15:47:33 -0700 |
---|---|---|
committer | Jordan Justen <jordan.l.justen@intel.com> | 2015-12-09 23:50:38 -0800 |
commit | d04612b60d98ff785646affaffc3d7243deecb74 (patch) | |
tree | 39ea323c62ff2bd577346aa58e6afbd07c74f573 /src/mesa/drivers/dri/i965/brw_context.c | |
parent | e288b4a133f1ea8208cd219545a72805ed5a91c6 (diff) | |
download | external_mesa3d-d04612b60d98ff785646affaffc3d7243deecb74.zip external_mesa3d-d04612b60d98ff785646affaffc3d7243deecb74.tar.gz external_mesa3d-d04612b60d98ff785646affaffc3d7243deecb74.tar.bz2 |
i965: Enable ARB_compute_shader extension on supported hardware
Enable ARB_compute_shader on gen7+, on hardware that supports the
OpenGL 4.3 requirements of a local group size of 1024.
With SIMD16 support, this is limited to Ivy Bridge and Haswell.
Broadwell will work with a local group size up to 896 on SIMD16
meaning programs that use this size or lower should run when setting
MESA_EXTENSION_OVERRIDE=GL_ARB_compute_shader.
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_context.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 88be907..1511dd5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -349,7 +349,9 @@ brw_initialize_context_constants(struct brw_context *brw) [MESA_SHADER_TESS_EVAL] = brw->gen >= 8, [MESA_SHADER_GEOMETRY] = brw->gen >= 6, [MESA_SHADER_FRAGMENT] = true, - [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader, + [MESA_SHADER_COMPUTE] = + (ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || + _mesa_extension_override_enables.ARB_compute_shader, }; unsigned num_stages = 0; @@ -638,7 +640,7 @@ brw_initialize_context_constants(struct brw_context *brw) } static void -brw_adjust_cs_context_constants(struct brw_context *brw) +brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads) { struct gl_context *ctx = &brw->ctx; @@ -652,7 +654,7 @@ brw_adjust_cs_context_constants(struct brw_context *brw) */ const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8; - const uint32_t max_invocations = simd_size * brw->max_cs_threads; + const uint32_t max_invocations = simd_size * max_threads; ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; @@ -844,6 +846,7 @@ brwCreateContext(gl_api api, if (INTEL_DEBUG & DEBUG_PERF) brw->perf_debug = true; + brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads); brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset @@ -898,8 +901,6 @@ brwCreateContext(gl_api api, brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; - brw_adjust_cs_context_constants(brw); - /* Estimate the size of the mappable aperture into the GTT. There's an * ioctl to get the whole GTT size, but not one to get the mappable subset. * It turns out it's basically always 256MB, though some ancient hardware |