From 9a4206379b0e36d440481ae89b98467ed53dc86b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 7 Nov 2016 09:05:59 -0800 Subject: vc4: Don't abort when a shader compile fails. It's much better to just skip the draw call entirely. Getting this information out of register allocation will also be useful for implementing threaded fragment shaders, which will need to retry non-threaded if RA fails. Cc: (cherry picked from commit 4d019bd703e7c20d56d5b858577607115b4926a3) --- src/gallium/drivers/vc4/vc4_context.h | 8 +++++++- src/gallium/drivers/vc4/vc4_draw.c | 5 ++++- src/gallium/drivers/vc4/vc4_program.c | 18 ++++++++++++++---- src/gallium/drivers/vc4/vc4_qir.h | 1 + src/gallium/drivers/vc4/vc4_qpu_emit.c | 5 ++++- src/gallium/drivers/vc4/vc4_register_allocate.c | 3 ++- 6 files changed, 32 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 313630a..c164eba 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -156,6 +156,12 @@ struct vc4_compiled_shader { bool disable_early_z; + /* Set if the compile failed, likely due to register allocation + * failure. In this case, we have no shader to run and should not try + * to do any draws. + */ + bool failed; + uint8_t num_inputs; /* Byte offsets for the start of the vertex attributes 0-7, and the @@ -449,7 +455,7 @@ void vc4_flush_jobs_reading_resource(struct vc4_context *vc4, void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c); struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c); -void vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode); +bool vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode); bool vc4_rt_format_supported(enum pipe_format f); bool vc4_rt_format_is_565(enum pipe_format f); diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 61c5842..c5afc0c 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -307,7 +307,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } vc4_start_draw(vc4); - vc4_update_compiled_shaders(vc4, info->mode); + if (!vc4_update_compiled_shaders(vc4, info->mode)) { + debug_warn_once("shader compile failed, skipping draw call.\n"); + return; + } vc4_emit_state(pctx); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0145488..fe07d91 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2437,9 +2437,15 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, } } - copy_uniform_state_to_shader(shader, c); - shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts, - c->qpu_inst_count * sizeof(uint64_t)); + shader->failed = c->failed; + if (c->failed) { + shader->failed = true; + } else { + copy_uniform_state_to_shader(shader, c); + shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts, + c->qpu_inst_count * + sizeof(uint64_t)); + } /* Copy the compiler UBO range state to the compiled shader, dropping * out arrays that were never referenced by an indirect load. @@ -2642,11 +2648,15 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode) } } -void +bool vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode) { vc4_update_compiled_fs(vc4, prim_mode); vc4_update_compiled_vs(vc4, prim_mode); + + return !(vc4->prog.cs->failed || + vc4->prog.vs->failed || + vc4->prog.fs->failed); } static uint32_t diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 4d41c42..c76aeb2 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -523,6 +523,7 @@ struct vc4_compile { uint32_t program_id; uint32_t variant_id; + bool failed; }; /* Special nir_load_input intrinsic index for loading the current TLB diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 4d371c0..eedee55 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -565,10 +565,13 @@ vc4_generate_code_block(struct vc4_compile *c, void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { - struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); struct qblock *start_block = list_first_entry(&c->blocks, struct qblock, link); + struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); + if (!temp_registers) + return; + switch (c->stage) { case QSTAGE_VERT: case QSTAGE_COORD: diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index fc44764..6c99b05 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -323,7 +323,8 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) if (!ok) { fprintf(stderr, "Failed to register allocate:\n"); qir_dump(c); - abort(); + c->failed = true; + return NULL; } for (uint32_t i = 0; i < c->num_temps; i++) { -- cgit v1.1 From 64d7d70c5b4722f53f6080b35ec516462f1e191b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 7 Nov 2016 12:25:47 -0800 Subject: vc4: Clamp the shadow comparison value. Fixes piglit glsl-fs-shadow2D-clamp-z. Cc: (cherry picked from commit 08d51487e3b8cfb14ca2ece9545b2e2ed344e3cc) --- src/gallium/drivers/vc4/vc4_program.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index fe07d91..05e2021 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -451,6 +451,15 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) struct qreg u0 = qir_uniform_f(c, 0.0f); struct qreg u1 = qir_uniform_f(c, 1.0f); if (c->key->tex[unit].compare_mode) { + /* From the GL_ARB_shadow spec: + * + * "Let Dt (D subscript t) be the depth texture + * value, in the range [0, 1]. Let R be the + * interpolated texture coordinate clamped to the + * range [0, 1]." + */ + compare = qir_SAT(c, compare); + switch (c->key->tex[unit].compare_func) { case PIPE_FUNC_NEVER: depth_output = qir_uniform_f(c, 0.0f); -- cgit v1.1 From fd5fe00f7bb672cad9da19329acac88dcc2086f7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 11 Nov 2016 14:04:42 -0800 Subject: vc4: Fix register class handling of DDX/DDY arguments. I had this exactly backwards, but apparently the piglit tests were all landing in r0-r3 anyway. Cc: "13.0" (cherry picked from commit 977d8b526b983c8d19df00af224033389f8ab7c8) --- src/gallium/drivers/vc4/vc4_register_allocate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 6c99b05..ab343ee 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -247,7 +247,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) case QOP_ROT_MUL: assert(inst->src[0].file == QFILE_TEMP); - class_bits[inst->src[0].index] &= ~CLASS_BIT_R0_R3; + class_bits[inst->src[0].index] &= CLASS_BIT_R0_R3; break; default: -- cgit v1.1 From 3d5b40fa763acb7c1564f9b4d0785f582210927b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 18 Nov 2016 15:18:10 +0100 Subject: radeonsi: store group_size_variable in struct si_compute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For compute shaders, we free the selector after the shader has been compiled, so we need to save this bit somewhere else. Also, make sure that this type of bug cannot re-appear, by NULL-ing the selector pointer after we're done with it. This bug has been there since the feature was added, but was only exposed in piglit arb_compute_variable_group_size-local-size by commit 9bfee7047b70cb0aa026ca9536465762f96cb2b1 (which is totally unrelated). Cc: 13.0 Reviewed-by: Marek Olšák (cherry picked from commit 42d5e91a2ae235c007c5d17935be9bb1c4ff388e) --- src/gallium/drivers/radeonsi/si_compute.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e785106..a35187c 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -42,7 +42,8 @@ struct si_compute { struct si_shader shader; struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; - bool use_code_object_v2; + unsigned use_code_object_v2 : 1; + unsigned variable_group_size : 1; }; struct dispatch_packet { @@ -147,7 +148,11 @@ static void *si_create_compute_state( S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) | S_00B84C_LDS_SIZE(shader->config.lds_size); + program->variable_group_size = + sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; + FREE(sel.tokens); + program->shader.selector = NULL; } else { const struct pipe_llvm_program_header *header; const char *code; @@ -607,14 +612,12 @@ static void si_setup_tgsi_grid(struct si_context *sctx, } } else { struct si_compute *program = sctx->cs_shader_state.program; - bool variable_group_size = - program->shader.selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; - radeon_set_sh_reg_seq(cs, grid_size_reg, variable_group_size ? 6 : 3); + radeon_set_sh_reg_seq(cs, grid_size_reg, program->variable_group_size ? 6 : 3); radeon_emit(cs, info->grid[0]); radeon_emit(cs, info->grid[1]); radeon_emit(cs, info->grid[2]); - if (variable_group_size) { + if (program->variable_group_size) { radeon_emit(cs, info->block[0]); radeon_emit(cs, info->block[1]); radeon_emit(cs, info->block[2]); -- cgit v1.1