diff options
author | Kenneth Graunke <kenneth@whitecape.org> | 2015-08-05 09:16:59 -0700 |
---|---|---|
committer | Kenneth Graunke <kenneth@whitecape.org> | 2015-09-23 11:00:00 -0700 |
commit | df31c1850d14729e27513ae733110a668f6b6e95 (patch) | |
tree | 06d123e528dae4a2cb26e8e31dc4603658f3cf18 /src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | |
parent | 542d40d698a698dc656c7a64ddcea07060707555 (diff) | |
download | external_mesa3d-df31c1850d14729e27513ae733110a668f6b6e95.zip external_mesa3d-df31c1850d14729e27513ae733110a668f6b6e95.tar.gz external_mesa3d-df31c1850d14729e27513ae733110a668f6b6e95.tar.bz2 |
i965/gs: Use new NIR intrinsics.
By performing the vertex counting in NIR, we're able to elide a ton of
useless safety checks around every EmitVertex() call:
total instructions in shared programs: 3952 -> 3720 (-5.87%)
instructions in affected programs: 3491 -> 3259 (-6.65%)
helped: 11
HURT: 0
Improves performance in Gl32GSCloth by 0.671742% +/- 0.142202% (n=621)
on Haswell GT3e at 1024x768.
This should also make it easier to implement Broadwell's "Static Vertex
Count" feature someday.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 28 |
1 files changed, 15 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index b9694f6..7a5b945 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -484,14 +484,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0) return; - /* To ensure that we don't output more vertices than the shader specified - * using max_vertices, do the logic inside a conditional of the form "if - * (vertex_count < MAX)" - */ - unsigned num_output_vertices = c->gp->program.VerticesOut; - emit(CMP(dst_null_d(), this->vertex_count, - src_reg(num_output_vertices), BRW_CONDITIONAL_L)); - emit(IF(BRW_PREDICATE_NORMAL)); { /* If we're outputting 32 control data bits or less, then we can wait * until the shader is over to output them all. Otherwise we need to @@ -562,12 +554,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) this->current_annotation = "emit vertex: Stream control data bits"; set_stream_control_data_bits(stream_id); } - - this->current_annotation = "emit vertex: increment vertex count"; - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, - src_reg(1u))); } - emit(BRW_OPCODE_ENDIF); this->current_annotation = NULL; } @@ -575,7 +562,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) void vec4_gs_visitor::visit(ir_emit_vertex *ir) { + /* To ensure that we don't output more vertices than the shader specified + * using max_vertices, do the logic inside a conditional of the form "if + * (vertex_count < MAX)" + */ + unsigned num_output_vertices = c->gp->program.VerticesOut; + emit(CMP(dst_null_d(), this->vertex_count, + src_reg(num_output_vertices), BRW_CONDITIONAL_L)); + emit(IF(BRW_PREDICATE_NORMAL)); + gs_emit_vertex(ir->stream_id()); + + this->current_annotation = "emit vertex: increment vertex count"; + emit(ADD(dst_reg(this->vertex_count), this->vertex_count, + src_reg(1u))); + + emit(BRW_OPCODE_ENDIF); } void |