From 31a36ffbc81a4dd79b91bf0fc59f0e5f8d44dbd7 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 3 Sep 2015 01:01:29 -0700 Subject: i965/gs: Fix extra level of indentation left by the previous commit. I left a bunch of code indented a level in the previous patch to make the diff easier to read. But now we should fix that. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 124 +++++++++++----------- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 102 +++++++++--------- 2 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 7a5b945..3cb1b4c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -484,76 +484,74 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0) return; - { - /* If we're outputting 32 control data bits or less, then we can wait - * until the shader is over to output them all. Otherwise we need to - * output them as we go. Now is the time to do it, since we're about to - * output the vertex_count'th vertex, so it's guaranteed that the - * control data bits associated with the (vertex_count - 1)th vertex are - * correct. + /* If we're outputting 32 control data bits or less, then we can wait + * until the shader is over to output them all. Otherwise we need to + * output them as we go. Now is the time to do it, since we're about to + * output the vertex_count'th vertex, so it's guaranteed that the + * control data bits associated with the (vertex_count - 1)th vertex are + * correct. + */ + if (c->control_data_header_size_bits > 32) { + this->current_annotation = "emit vertex: emit control data bits"; + /* Only emit control data bits if we've finished accumulating a batch + * of 32 bits. This is the case when: + * + * (vertex_count * bits_per_vertex) % 32 == 0 + * + * (in other words, when the last 5 bits of vertex_count * + * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some + * integer n (which is always the case, since bits_per_vertex is + * always 1 or 2), this is equivalent to requiring that the last 5-n + * bits of vertex_count are 0: + * + * vertex_count & (2^(5-n) - 1) == 0 + * + * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is + * equivalent to: + * + * vertex_count & (32 / bits_per_vertex - 1) == 0 */ - if (c->control_data_header_size_bits > 32) { - this->current_annotation = "emit vertex: emit control data bits"; - /* Only emit control data bits if we've finished accumulating a batch - * of 32 bits. This is the case when: - * - * (vertex_count * bits_per_vertex) % 32 == 0 - * - * (in other words, when the last 5 bits of vertex_count * - * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some - * integer n (which is always the case, since bits_per_vertex is - * always 1 or 2), this is equivalent to requiring that the last 5-n - * bits of vertex_count are 0: - * - * vertex_count & (2^(5-n) - 1) == 0 - * - * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is - * equivalent to: - * - * vertex_count & (32 / bits_per_vertex - 1) == 0 + vec4_instruction *inst = + emit(AND(dst_null_d(), this->vertex_count, + (uint32_t) (32 / c->control_data_bits_per_vertex - 1))); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + emit(IF(BRW_PREDICATE_NORMAL)); + { + /* If vertex_count is 0, then no control data bits have been + * accumulated yet, so we skip emitting them. */ - vec4_instruction *inst = - emit(AND(dst_null_d(), this->vertex_count, - (uint32_t) (32 / c->control_data_bits_per_vertex - 1))); - inst->conditional_mod = BRW_CONDITIONAL_Z; - + emit(CMP(dst_null_d(), this->vertex_count, 0u, + BRW_CONDITIONAL_NEQ)); emit(IF(BRW_PREDICATE_NORMAL)); - { - /* If vertex_count is 0, then no control data bits have been - * accumulated yet, so we skip emitting them. - */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, - BRW_CONDITIONAL_NEQ)); - emit(IF(BRW_PREDICATE_NORMAL)); - emit_control_data_bits(); - emit(BRW_OPCODE_ENDIF); - - /* Reset control_data_bits to 0 so we can start accumulating a new - * batch. - * - * Note: in the case where vertex_count == 0, this neutralizes the - * effect of any call to EndPrimitive() that the shader may have - * made before outputting its first vertex. - */ - inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); - inst->force_writemask_all = true; - } + emit_control_data_bits(); emit(BRW_OPCODE_ENDIF); + + /* Reset control_data_bits to 0 so we can start accumulating a new + * batch. + * + * Note: in the case where vertex_count == 0, this neutralizes the + * effect of any call to EndPrimitive() that the shader may have + * made before outputting its first vertex. + */ + inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); + inst->force_writemask_all = true; } + emit(BRW_OPCODE_ENDIF); + } - this->current_annotation = "emit vertex: vertex data"; - emit_vertex(); + this->current_annotation = "emit vertex: vertex data"; + emit_vertex(); - /* In stream mode we have to set control data bits for all vertices - * unless we have disabled control data bits completely (which we do - * do for GL_POINTS outputs that don't use streams). - */ - if (c->control_data_header_size_bits > 0 && - c->prog_data.control_data_format == - GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { - this->current_annotation = "emit vertex: Stream control data bits"; - set_stream_control_data_bits(stream_id); - } + /* In stream mode we have to set control data bits for all vertices + * unless we have disabled control data bits completely (which we do + * do for GL_POINTS outputs that don't use streams). + */ + if (c->control_data_header_size_bits > 0 && + c->prog_data.control_data_format == + GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { + this->current_annotation = "emit vertex: Stream control data bits"; + set_stream_control_data_bits(stream_id); } this->current_annotation = NULL; diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 5cfff7b..4c9c960 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -172,64 +172,62 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) { this->current_annotation = "gen6 emit vertex"; - { - /* Buffer all output slots for this vertex in vertex_output */ - for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { - int varying = prog_data->vue_map.slot_to_varying[slot]; - if (varying != VARYING_SLOT_PSIZ) { - dst_reg dst(this->vertex_output); - dst.reladdr = ralloc(mem_ctx, src_reg); - memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); - emit_urb_slot(dst, varying); - } else { - /* The PSIZ slot can pack multiple varyings in different channels - * and emit_urb_slot() will produce a MOV instruction for each of - * them. Since we are writing to an array, that will translate to - * possibly multiple MOV instructions with an array destination and - * each will generate a scratch write with the same offset into - * scratch space (thus, each one overwriting the previous). This is - * not what we want. What we will do instead is emit PSIZ to a - * a regular temporary register, then move that resgister into the - * array. This way we only have one instruction with an array - * destination and we only produce a single scratch write. - */ - dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type)); - emit_urb_slot(tmp, varying); - dst_reg dst(this->vertex_output); - dst.reladdr = ralloc(mem_ctx, src_reg); - memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); - vec4_instruction *inst = emit(MOV(dst, src_reg(tmp))); - inst->force_writemask_all = true; - } - - emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); - } - - /* Now buffer flags for this vertex */ - dst_reg dst(this->vertex_output); - dst.reladdr = ralloc(mem_ctx, src_reg); - memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); - if (c->gp->program.OutputType == GL_POINTS) { - /* If we are outputting points, then every vertex has PrimStart and - * PrimEnd set. - */ - emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | - URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)); - emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u)); + /* Buffer all output slots for this vertex in vertex_output */ + for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { + int varying = prog_data->vue_map.slot_to_varying[slot]; + if (varying != VARYING_SLOT_PSIZ) { + dst_reg dst(this->vertex_output); + dst.reladdr = ralloc(mem_ctx, src_reg); + memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); + emit_urb_slot(dst, varying); } else { - /* Otherwise, we can only set the PrimStart flag, which we have stored - * in the first_vertex register. We will have to wait until we execute - * EndPrimitive() or we end the thread to set the PrimEnd flag on a - * vertex. + /* The PSIZ slot can pack multiple varyings in different channels + * and emit_urb_slot() will produce a MOV instruction for each of + * them. Since we are writing to an array, that will translate to + * possibly multiple MOV instructions with an array destination and + * each will generate a scratch write with the same offset into + * scratch space (thus, each one overwriting the previous). This is + * not what we want. What we will do instead is emit PSIZ to a + * a regular temporary register, then move that resgister into the + * array. This way we only have one instruction with an array + * destination and we only produce a single scratch write. */ - emit(OR(dst, this->first_vertex, - (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT))); - emit(MOV(dst_reg(this->first_vertex), 0u)); + dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type)); + emit_urb_slot(tmp, varying); + dst_reg dst(this->vertex_output); + dst.reladdr = ralloc(mem_ctx, src_reg); + memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); + vec4_instruction *inst = emit(MOV(dst, src_reg(tmp))); + inst->force_writemask_all = true; } + emit(ADD(dst_reg(this->vertex_output_offset), this->vertex_output_offset, 1u)); } + + /* Now buffer flags for this vertex */ + dst_reg dst(this->vertex_output); + dst.reladdr = ralloc(mem_ctx, src_reg); + memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); + if (c->gp->program.OutputType == GL_POINTS) { + /* If we are outputting points, then every vertex has PrimStart and + * PrimEnd set. + */ + emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | + URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)); + emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u)); + } else { + /* Otherwise, we can only set the PrimStart flag, which we have stored + * in the first_vertex register. We will have to wait until we execute + * EndPrimitive() or we end the thread to set the PrimEnd flag on a + * vertex. + */ + emit(OR(dst, this->first_vertex, + (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT))); + emit(MOV(dst_reg(this->first_vertex), 0u)); + } + emit(ADD(dst_reg(this->vertex_output_offset), + this->vertex_output_offset, 1u)); } void -- cgit v1.1