summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_context.c
diff options
context:
space:
mode:
authorJordan Justen <jordan.l.justen@intel.com>2015-04-29 10:54:17 -0700
committerJordan Justen <jordan.l.justen@intel.com>2015-05-02 00:49:59 -0700
commitcb18f3f0213c010e657fd0b41e681e960a20b790 (patch)
tree4acfd9df56411501965c1afdb6ea3190ec36d90c /src/mesa/drivers/dri/i965/brw_context.c
parent73cb2d3a73425c5efa4f98b71393e7dad4f387c7 (diff)
downloadexternal_mesa3d-cb18f3f0213c010e657fd0b41e681e960a20b790.zip
external_mesa3d-cb18f3f0213c010e657fd0b41e681e960a20b790.tar.gz
external_mesa3d-cb18f3f0213c010e657fd0b41e681e960a20b790.tar.bz2
i965/cs: Set invocation counts based on max_cs_threads
For ES, we set the max counts based on SIMD8, which is currently accurate. For desktop GL, we set the max counts based on SIMD16, which can fail in some cases where a SIMD16 program is not currently supported. Therefore, this value is not currently accurate, but will work fine in many cases, and lets us run more test cases. Eventually we want to always be able to generate a SIMD16 program. Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_context.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 029091e..1f0da35 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -612,6 +612,28 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
}
+static void
+brw_adjust_cs_context_constants(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ /* For ES, we set these constants based on SIMD8.
+ *
+ * TODO: Once we can always generate SIMD16, we should update this.
+ *
+ * For GL, we assume we can generate a SIMD16 program, but this currently
+ * is not always true. This allows us to run more test cases, and will be
+ * required based on desktop GL compute shader requirements.
+ */
+ const simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
+
+ const uint32_t max_invocations = simd_size * brw->max_cs_threads;
+ ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
+ ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
+ ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
+ ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
+}
+
/**
* Process driconf (drirc) options, setting appropriate context flags.
*
@@ -843,6 +865,8 @@ brwCreateContext(gl_api api,
brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
+ brw_adjust_cs_context_constants(brw);
+
/* Estimate the size of the mappable aperture into the GTT. There's an
* ioctl to get the whole GTT size, but not one to get the mappable subset.
* It turns out it's basically always 256MB, though some ancient hardware