summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2016-07-29 01:29:09 -0700
committerKenneth Graunke <kenneth@whitecape.org>2016-08-04 00:40:10 -0700
commit12a912586f11ccbc4612532d5ceaf1bdd0cdb45a (patch)
treeac76f4daa209bed809f2dc0071d9a484b314bc5f
parent67c5e843b9ae2c72bcf5c84f8744c1bd23fc262a (diff)
downloadexternal_mesa3d-12a912586f11ccbc4612532d5ceaf1bdd0cdb45a.zip
external_mesa3d-12a912586f11ccbc4612532d5ceaf1bdd0cdb45a.tar.gz
external_mesa3d-12a912586f11ccbc4612532d5ceaf1bdd0cdb45a.tar.bz2
i965: Use a separate register for every access to an SSA undef.
Previously, we allocated a new VGRF for every undefined definition. Instead, this patch makes us allocate a new VGRF for every use of an undefined definition. This makes sure that undefined values are fully independent of one another, and have live ranges limited to their single use. This allows register coalescing to combine the source and destination of MOVs from undefined sources, eliminating the MOV altogether. On Broadwell: total instructions in shared programs: 11641187 -> 11640214 (-0.01%) instructions in affected programs: 70199 -> 69226 (-1.39%) helped: 213 HURT: 1 v2: Add a comment (based on Iago's suggested one). Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp22
2 files changed, 11 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index fc1e1c4..8b1ea79 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -204,8 +204,6 @@ public:
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
void nir_emit_load_const(const brw::fs_builder &bld,
nir_load_const_instr *instr);
- void nir_emit_undef(const brw::fs_builder &bld,
- nir_ssa_undef_instr *instr);
void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 5236d0e..c1f413b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -445,7 +445,10 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
break;
case nir_instr_type_ssa_undef:
- nir_emit_undef(abld, nir_instr_as_ssa_undef(instr));
+ /* We create a new VGRF for undefs on every use (by handling
+ * them in get_nir_src()), rather than for each definition.
+ * This helps register coalescing eliminate MOVs from undef.
+ */
break;
case nir_instr_type_jump:
@@ -1489,21 +1492,18 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
nir_ssa_values[instr->def.index] = reg;
}
-void
-fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
-{
- const brw_reg_type reg_type =
- instr->def.bit_size == 32 ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF;
- nir_ssa_values[instr->def.index] =
- bld.vgrf(reg_type, instr->def.num_components);
-}
-
fs_reg
fs_visitor::get_nir_src(const nir_src &src)
{
fs_reg reg;
if (src.is_ssa) {
- reg = nir_ssa_values[src.ssa->index];
+ if (src.ssa->parent_instr->type == nir_instr_type_ssa_undef) {
+ const brw_reg_type reg_type = src.ssa->bit_size == 32 ?
+ BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF;
+ reg = bld.vgrf(reg_type, src.ssa->num_components);
+ } else {
+ reg = nir_ssa_values[src.ssa->index];
+ }
} else {
/* We don't handle indirects on locals */
assert(src.reg.indirect == NULL);