diff options
author | Kenneth Graunke <kenneth@whitecape.org> | 2015-09-24 18:21:59 -0700 |
---|---|---|
committer | Kenneth Graunke <kenneth@whitecape.org> | 2015-09-26 12:01:58 -0700 |
commit | f0a618ee7c26a3dd54292fbc2bfd914b0d680ed9 (patch) | |
tree | 4ab41436c615886e74efccd2b10c329b29c2bb14 /src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | |
parent | bcef2abad7cf255b6ac112b9ebf0ff75e491c968 (diff) | |
download | external_mesa3d-f0a618ee7c26a3dd54292fbc2bfd914b0d680ed9.zip external_mesa3d-f0a618ee7c26a3dd54292fbc2bfd914b0d680ed9.tar.gz external_mesa3d-f0a618ee7c26a3dd54292fbc2bfd914b0d680ed9.tar.bz2 |
i965: Implement "Static Vertex Count" geometry shader optimization.
Broadwell's 3DSTATE_GS contains new "Static Output" and "Static Vertex
Count" fields, which control a new optimization. Normally, geometry
shaders can output arbitrary numbers of vertices, which means that
resource allocation has to be done on the fly. However, if the number
of vertices is statically known, the hardware can pre-allocate resources
up front, which is more efficient.
Thanks to the new NIR GS intrinsics, this is easy. We just call the
function introduced in the previous commit to get the vertex count.
If it obtains a count, we stop emitting the extra 32-bit "Vertex Count"
field in the VUE, and instead fill out the 3DSTATE_GS fields.
Improves performance of Gl32GSCloth by 5.16347% +/- 0.12611% (n=91)
on my Lenovo X250 laptop (Broadwell GT2) at 1024x768.
shader-db statistics for geometry shaders only:
total instructions in shared programs: 3227 -> 3207 (-0.62%)
instructions in affected programs: 242 -> 222 (-8.26%)
helped: 10
v2: Don't break non-NIR paths (just skip this optimization).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index ff5bd98..acf0501 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -234,17 +234,20 @@ vec4_gs_visitor::emit_thread_end() */ int base_mrf = 1; + bool static_vertex_count = c->prog_data.static_vertex_count != -1; + current_annotation = "thread end"; dst_reg mrf_reg(MRF, base_mrf); src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); vec4_instruction *inst = emit(MOV(mrf_reg, r0)); inst->force_writemask_all = true; - emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); + if (devinfo->gen < 8 || !static_vertex_count) + emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_end(); inst = emit(GS_OPCODE_THREAD_END); inst->base_mrf = base_mrf; - inst->mlen = devinfo->gen >= 8 ? 2 : 1; + inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1; } @@ -284,7 +287,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete) /* We need to increment Global Offset by 1 to make room for Broadwell's * extra "Vertex Count" payload at the beginning of the URB entry. */ - if (devinfo->gen >= 8) + if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1) inst->offset++; inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; @@ -421,7 +424,7 @@ vec4_gs_visitor::emit_control_data_bits() * URB entry. Since this is an OWord message, Global Offset is counted * in 128-bit units, so we must set it to 2. */ - if (devinfo->gen >= 8) + if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1) inst->offset = 2; inst->base_mrf = base_mrf; inst->mlen = 2; |