summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/compiler/nir/nir.h2
-rw-r--r--src/compiler/nir/nir_opt_peephole_select.c82
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c2
4 files changed, 57 insertions, 31 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index aac247c..8d1afb9 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2600,7 +2600,7 @@ bool nir_opt_dead_cf(nir_shader *shader);
bool nir_opt_gcm(nir_shader *shader, bool value_number);
-bool nir_opt_peephole_select(nir_shader *shader);
+bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
bool nir_opt_remove_phis(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c
index 633e9f4..6a73d73 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -32,23 +32,33 @@
* Implements a small peephole optimization that looks for
*
* if (cond) {
- * <empty>
+ * <then SSA defs>
* } else {
- * <empty>
+ * <else SSA defs>
* }
* phi
* ...
* phi
*
- * and replaces it with a series of selects. It can also handle the case
- * where, instead of being empty, the if may contain some move operations
- * whose only use is one of the following phi nodes. This happens all the
- * time when the SSA form comes from a conditional assignment with a
- * swizzle.
+ * and replaces it with:
+ *
+ * <then SSA defs>
+ * <else SSA defs>
+ * bcsel
+ * ...
+ * bcsel
+ *
+ * where the SSA defs are ALU operations or other cheap instructions (not
+ * texturing, for example).
+ *
+ * If the number of ALU operations in the branches is greater than the limit
+ * parameter, then the optimization is skipped. In limit=0 mode, the SSA defs
+ * must only be MOVs which we expect to get copy-propagated away once they're
+ * out of the inner blocks.
*/
static bool
-block_check_for_allowed_instrs(nir_block *block)
+block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok)
{
nir_foreach_instr(instr, block) {
switch (instr->type) {
@@ -67,6 +77,11 @@ block_check_for_allowed_instrs(nir_block *block)
}
break;
+ case nir_intrinsic_load_uniform:
+ if (!alu_ok)
+ return false;
+ break;
+
default:
return false;
}
@@ -89,29 +104,36 @@ block_check_for_allowed_instrs(nir_block *block)
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
- /* It must be a move-like operation. */
break;
default:
- return false;
+ if (!alu_ok) {
+ /* It must be a move-like operation. */
+ return false;
+ }
+ break;
}
- /* Can't handle saturate */
- if (mov->dest.saturate)
- return false;
-
/* It must be SSA */
if (!mov->dest.dest.is_ssa)
return false;
- /* It cannot have any if-uses */
- if (!list_empty(&mov->dest.dest.ssa.if_uses))
- return false;
+ if (alu_ok) {
+ (*count)++;
+ } else {
+ /* Can't handle saturate */
+ if (mov->dest.saturate)
+ return false;
- /* The only uses of this definition must be phi's in the successor */
- nir_foreach_use(use, &mov->dest.dest.ssa) {
- if (use->parent_instr->type != nir_instr_type_phi ||
- use->parent_instr->block != block->successors[0])
+ /* It cannot have any if-uses */
+ if (!list_empty(&mov->dest.dest.ssa.if_uses))
return false;
+
+ /* The only uses of this definition must be phi's in the successor */
+ nir_foreach_use(use, &mov->dest.dest.ssa) {
+ if (use->parent_instr->type != nir_instr_type_phi ||
+ use->parent_instr->block != block->successors[0])
+ return false;
+ }
}
break;
}
@@ -125,7 +147,7 @@ block_check_for_allowed_instrs(nir_block *block)
}
static bool
-nir_opt_peephole_select_block(nir_block *block, void *mem_ctx)
+nir_opt_peephole_select_block(nir_block *block, void *mem_ctx, unsigned limit)
{
if (nir_cf_node_is_first(&block->cf_node))
return false;
@@ -147,8 +169,12 @@ nir_opt_peephole_select_block(nir_block *block, void *mem_ctx)
nir_block *else_block = nir_cf_node_as_block(else_node);
/* ... and those blocks must only contain "allowed" instructions. */
- if (!block_check_for_allowed_instrs(then_block) ||
- !block_check_for_allowed_instrs(else_block))
+ unsigned count = 0;
+ if (!block_check_for_allowed_instrs(then_block, &count, limit != 0) ||
+ !block_check_for_allowed_instrs(else_block, &count, limit != 0))
+ return false;
+
+ if (count > limit)
return false;
/* At this point, we know that the previous CFG node is an if-then
@@ -212,13 +238,13 @@ nir_opt_peephole_select_block(nir_block *block, void *mem_ctx)
}
static bool
-nir_opt_peephole_select_impl(nir_function_impl *impl)
+nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit)
{
void *mem_ctx = ralloc_parent(impl);
bool progress = false;
nir_foreach_block_safe(block, impl) {
- progress |= nir_opt_peephole_select_block(block, mem_ctx);
+ progress |= nir_opt_peephole_select_block(block, mem_ctx, limit);
}
if (progress)
@@ -228,13 +254,13 @@ nir_opt_peephole_select_impl(nir_function_impl *impl)
}
bool
-nir_opt_peephole_select(nir_shader *shader)
+nir_opt_peephole_select(nir_shader *shader, unsigned limit)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl)
- progress |= nir_opt_peephole_select_impl(function->impl);
+ progress |= nir_opt_peephole_select_impl(function->impl, limit);
}
return progress;
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 986a1ff..81c6716 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1430,7 +1430,7 @@ vc4_optimize_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
- NIR_PASS(progress, s, nir_opt_peephole_select);
+ NIR_PASS(progress, s, nir_opt_peephole_select, 8);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index fbc84c4..744865b 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -416,7 +416,7 @@ nir_optimize(nir_shader *nir, bool is_scalar)
OPT(nir_copy_prop);
OPT(nir_opt_dce);
OPT(nir_opt_cse);
- OPT(nir_opt_peephole_select);
+ OPT(nir_opt_peephole_select, 0);
OPT(nir_opt_algebraic);
OPT(nir_opt_constant_folding);
OPT(nir_opt_dead_cf);