summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_state_shaders.c
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-06-11 19:57:40 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-07-05 00:47:13 +0200
commit5c92c21369ee3b4f52eb5aed183092ba3ee7e079 (patch)
treed1464436b7410d1169ffd0a8e003db54f8c9e422 /src/gallium/drivers/radeonsi/si_state_shaders.c
parent84824935cf28b72bac9f73787aadf20b95dea230 (diff)
downloadexternal_mesa3d-5c92c21369ee3b4f52eb5aed183092ba3ee7e079.zip
external_mesa3d-5c92c21369ee3b4f52eb5aed183092ba3ee7e079.tar.gz
external_mesa3d-5c92c21369ee3b4f52eb5aed183092ba3ee7e079.tar.bz2
radeonsi: do compilation from si_create_shader_selector asynchronously
Main shader parts and geometry shaders are compiled asynchronously by util_queue. si_create_shader_selector doesn't wait and returns. si_draw_vbo(si_shader_select) waits for completion. This has the best effect when shaders are compiled at app-loading time. It doesn't help much for shaders compiled on demand, even though VS+PS compilation should take as much as time as the bigger one of the two. If an app creates more shaders, at most 4 threads will be used to compile them. Debug output disables this for shader stats to be printed in the correct order. (We could go even further and build variants asynchronously too, then emit draw calls without waiting and emit incomplete shader states, then force IB chaining to give the compiler more time, then sync the compilation at the IB flush and patch the IB with correct shader states. This is great for compilation before draw calls, but there are some difficulties such as scratch and tess states requiring the compiler output, and an on-disk shader cache will likely be a much better and simpler solution.) Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state_shaders.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c39
1 files changed, 33 insertions, 6 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 799aa57..117cf4b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -991,7 +991,8 @@ static int si_shader_select_with_key(struct si_screen *sscreen,
struct si_shader_ctx_state *state,
union si_shader_key *key,
LLVMTargetMachineRef tm,
- struct pipe_debug_callback *debug)
+ struct pipe_debug_callback *debug,
+ bool wait)
{
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
@@ -1005,6 +1006,13 @@ static int si_shader_select_with_key(struct si_screen *sscreen,
if (likely(current && memcmp(&current->key, key, sizeof(*key)) == 0))
return 0;
+ /* This must be done before the mutex is locked, because async GS
+ * compilation calls this function too, and therefore must enter
+ * the mutex first.
+ */
+ if (wait)
+ util_queue_job_wait(&sel->ready);
+
pipe_mutex_lock(sel->mutex);
/* Find the shader variant. */
@@ -1057,7 +1065,7 @@ static int si_shader_select(struct pipe_context *ctx,
si_shader_selector_key(ctx, state->cso, &key);
return si_shader_select_with_key(sctx->screen, state, &key,
- sctx->tm, &sctx->b.debug);
+ sctx->tm, &sctx->b.debug, true);
}
static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
@@ -1094,10 +1102,19 @@ void si_init_shader_selector_async(void *job, int thread_index)
{
struct si_shader_selector *sel = (struct si_shader_selector *)job;
struct si_screen *sscreen = sel->screen;
- LLVMTargetMachineRef tm = sel->tm;
- struct pipe_debug_callback *debug = &sel->debug;
+ LLVMTargetMachineRef tm;
+ struct pipe_debug_callback *debug;
unsigned i;
+ if (thread_index >= 0) {
+ assert(thread_index < ARRAY_SIZE(sscreen->tm));
+ tm = sscreen->tm[thread_index];
+ debug = NULL;
+ } else {
+ tm = sel->tm;
+ debug = &sel->debug;
+ }
+
/* Compile the main shader part for use with a prolog and/or epilog.
* If this fails, the driver will try to compile a monolithic shader
* on demand.
@@ -1172,7 +1189,8 @@ void si_init_shader_selector_async(void *job, int thread_index)
break;
}
- if (si_shader_select_with_key(sscreen, &state, &key, tm, debug))
+ if (si_shader_select_with_key(sscreen, &state, &key, tm, debug,
+ false))
fprintf(stderr, "radeonsi: can't create a monolithic shader\n");
}
}
@@ -1304,8 +1322,14 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) |
S_02880C_EXEC_ON_NOOP(1);
pipe_mutex_init(sel->mutex);
+ util_queue_fence_init(&sel->ready);
- si_init_shader_selector_async(sel, -1);
+ if (sctx->b.debug.debug_message ||
+ !util_queue_is_initialized(&sscreen->shader_compiler_queue))
+ si_init_shader_selector_async(sel, -1);
+ else
+ util_queue_add_job(&sscreen->shader_compiler_queue, sel,
+ &sel->ready, si_init_shader_selector_async);
return sel;
}
@@ -1442,6 +1466,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
[PIPE_SHADER_FRAGMENT] = &sctx->ps_shader,
};
+ util_queue_job_wait(&sel->ready);
+
if (current_shader[sel->type]->cso == sel) {
current_shader[sel->type]->cso = NULL;
current_shader[sel->type]->current = NULL;
@@ -1456,6 +1482,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
if (sel->main_shader_part)
si_delete_shader(sctx, sel->main_shader_part);
+ util_queue_fence_destroy(&sel->ready);
pipe_mutex_destroy(sel->mutex);
free(sel->tokens);
free(sel);