From 918bda23dda36004c95f6441328ecc892e068886 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 4 Nov 2015 23:05:07 -0800 Subject: i965: Split nir_emit_intrinsic by stage with a general fallback. Many intrinsics only apply to a particular stage (such as discard). In other cases, we may want to interpret them differently based on the stage (such as load_primitive_id or load_input). The current method isn't that pretty - we handle all intrinsics in one giant function. Sometimes we assert on stage, sometimes we forget. Different behaviors are handled via if-ladders based on stage. This commit introduces new nir_emit__intrinsic() functions, and makes nir_emit_instr() call those. In turn, those fall back to the generic nir_emit_intrinsic() function for cases they don't want to handle specially. This makes it clear which intrinsics only exist in one stage, and makes it easy to handle inputs/outputs differently for various stages. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs.h | 8 + src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 788 +++++++++++++++++-------------- 2 files changed, 450 insertions(+), 346 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2dfcab1..8a93b56 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -263,6 +263,14 @@ public: nir_load_const_instr *instr); void nir_emit_undef(const brw::fs_builder &bld, nir_ssa_undef_instr *instr); + void nir_emit_vs_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + void nir_emit_gs_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + void nir_emit_fs_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + void nir_emit_cs_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); void nir_emit_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_ssbo_atomic(const brw::fs_builder &bld, diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 02b9f5b..52d5ad1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -360,7 +360,22 @@ fs_visitor::nir_emit_instr(nir_instr *instr) break; case nir_instr_type_intrinsic: - nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr)); + switch (stage) { + case MESA_SHADER_VERTEX: + nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr)); + break; + case MESA_SHADER_GEOMETRY: + nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr)); + break; + case MESA_SHADER_FRAGMENT: + nir_emit_fs_intrinsic(abld, nir_instr_as_intrinsic(instr)); + break; + case MESA_SHADER_COMPUTE: + nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr)); + break; + default: + unreachable("unsupported shader stage"); + } break; case nir_instr_type_tex: @@ -1568,15 +1583,128 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, } void -fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) +fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) { + assert(stage == MESA_SHADER_VERTEX); + fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_dest(instr->dest); - bool has_indirect = false; + switch (instr->intrinsic) { + case nir_intrinsic_load_vertex_id: + unreachable("should be lowered by lower_vertex_id()"); + + case nir_intrinsic_load_vertex_id_zero_base: + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_instance_id: { + gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); + fs_reg val = nir_system_values[sv]; + assert(val.file != BAD_FILE); + dest.type = val.type; + bld.MOV(dest, val); + break; + } + + default: + nir_emit_intrinsic(bld, instr); + break; + } +} + +void +fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + assert(stage == MESA_SHADER_GEOMETRY); + + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + switch (instr->intrinsic) { + case nir_intrinsic_load_primitive_id: + assert(stage == MESA_SHADER_GEOMETRY); + assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), + retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD)); + break; + + case nir_intrinsic_load_input_indirect: + case nir_intrinsic_load_input: + unreachable("load_input intrinsics are invalid for the GS stage"); + + case nir_intrinsic_load_per_vertex_input_indirect: + assert(!"Not allowed"); + case nir_intrinsic_load_per_vertex_input: + emit_gs_input_load(dest, instr->src[0], instr->const_index[0], + instr->num_components); + break; + + case nir_intrinsic_emit_vertex_with_counter: + emit_gs_vertex(instr->src[0], instr->const_index[0]); + break; + + case nir_intrinsic_end_primitive_with_counter: + emit_gs_end_primitive(instr->src[0]); + break; + + case nir_intrinsic_set_vertex_count: + bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0])); + break; + + case nir_intrinsic_load_invocation_id: { + fs_reg val = nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; + assert(val.file != BAD_FILE); + dest.type = val.type; + bld.MOV(dest, val); + break; + } + + default: + nir_emit_intrinsic(bld, instr); + break; + } +} + +void +fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + assert(stage == MESA_SHADER_FRAGMENT); + struct brw_wm_prog_data *wm_prog_data = + (struct brw_wm_prog_data *) prog_data; + + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); switch (instr->intrinsic) { + case nir_intrinsic_load_front_face: + bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), + *emit_frontfacing_interpolation()); + break; + + case nir_intrinsic_load_sample_pos: { + fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS]; + assert(sample_pos.file != BAD_FILE); + dest.type = sample_pos.type; + bld.MOV(dest, sample_pos); + bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1)); + break; + } + + case nir_intrinsic_load_sample_mask_in: + case nir_intrinsic_load_sample_id: { + gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); + fs_reg val = nir_system_values[sv]; + assert(val.file != BAD_FILE); + dest.type = val.type; + bld.MOV(dest, val); + break; + } + case nir_intrinsic_discard: case nir_intrinsic_discard_if: { /* We track our discarded pixels in f0.1. By predicating on it, we can @@ -1602,90 +1730,332 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_atomic_counter_inc: - case nir_intrinsic_atomic_counter_dec: - case nir_intrinsic_atomic_counter_read: { - using namespace surface_access; + case nir_intrinsic_interp_var_at_centroid: + case nir_intrinsic_interp_var_at_sample: + case nir_intrinsic_interp_var_at_offset: { + /* Handle ARB_gpu_shader5 interpolation intrinsics + * + * It's worth a quick word of explanation as to why we handle the full + * variable-based interpolation intrinsic rather than a lowered version + * with like we do for other inputs. We have to do that because the way + * we set up inputs doesn't allow us to use the already setup inputs for + * interpolation. At the beginning of the shader, we go through all of + * the input variables and do the initial interpolation and put it in + * the nir_inputs array based on its location as determined in + * nir_lower_io. If the input isn't used, dead code cleans up and + * everything works fine. However, when we get to the ARB_gpu_shader5 + * interpolation intrinsics, we need to reinterpolate the input + * differently. If we used an intrinsic that just had an index it would + * only give us the offset into the nir_inputs array. However, this is + * useless because that value is post-interpolation and we need + * pre-interpolation. In order to get the actual location of the bits + * we get from the vertex fetching hardware, we need the variable. + */ + wm_prog_data->pulls_bary = true; - /* Get the arguments of the atomic intrinsic. */ - const fs_reg offset = get_nir_src(instr->src[0]); - const unsigned surface = (stage_prog_data->binding_table.abo_start + - instr->const_index[0]); - fs_reg tmp; + fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); + const glsl_interp_qualifier interpolation = + (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation; - /* Emit a surface read or atomic op. */ switch (instr->intrinsic) { - case nir_intrinsic_atomic_counter_read: - tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1); - break; - - case nir_intrinsic_atomic_counter_inc: - tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), - fs_reg(), 1, 1, BRW_AOP_INC); + case nir_intrinsic_interp_var_at_centroid: + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_CENTROID, + dst_xy, + fs_reg(), /* src */ + fs_reg(0u), + interpolation); break; - case nir_intrinsic_atomic_counter_dec: - tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), - fs_reg(), 1, 1, BRW_AOP_PREDEC); - break; + case nir_intrinsic_interp_var_at_sample: { + nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); - default: - unreachable("Unreachable"); - } + if (const_sample) { + unsigned msg_data = const_sample->i[0] << 4; - /* Assign the result. */ - bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), tmp); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + fs_reg(msg_data), + interpolation); + } else { + const fs_reg sample_src = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); - /* Mark the surface as used. */ - brw_mark_surface_used(stage_prog_data, surface); - break; - } + if (nir_src_is_dynamically_uniform(instr->src[0])) { + const fs_reg sample_id = bld.emit_uniformize(sample_src); + const fs_reg msg_data = vgrf(glsl_type::uint_type); + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + msg_data, + interpolation); + } else { + /* Make a loop that sends a message to the pixel interpolater + * for the sample number in each live channel. If there are + * multiple channels with the same sample number then these + * will be handled simultaneously with a single interation of + * the loop. + */ + bld.emit(BRW_OPCODE_DO); - case nir_intrinsic_image_load: - case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_min: - case nir_intrinsic_image_atomic_max: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: { - using namespace image_access; + /* Get the next live sample number into sample_id_reg */ + const fs_reg sample_id = bld.emit_uniformize(sample_src); - /* Get the referenced image variable and type. */ - const nir_variable *var = instr->variables[0]->var; - const glsl_type *type = var->type->without_array(); - const brw_reg_type base_type = get_image_base_type(type); + /* Set the flag register so that we can perform the send + * message on all channels that have the same sample number + */ + bld.CMP(bld.null_reg_ud(), + sample_src, sample_id, + BRW_CONDITIONAL_EQ); + const fs_reg msg_data = vgrf(glsl_type::uint_type); + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + fs_inst *inst = + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + msg_data, + interpolation); + set_predicate(BRW_PREDICATE_NORMAL, inst); - /* Get some metadata from the image intrinsic. */ - const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; - const unsigned arr_dims = type->sampler_array ? 1 : 0; - const unsigned surf_dims = type->coordinate_components() - arr_dims; - const mesa_format format = - (var->data.image.write_only ? MESA_FORMAT_NONE : - _mesa_get_shader_image_format(var->data.image.format)); + /* Continue the loop if there are any live channels left */ + set_predicate_inv(BRW_PREDICATE_NORMAL, + true, /* inverse */ + bld.emit(BRW_OPCODE_WHILE)); + } + } - /* Get the arguments of the image intrinsic. */ - const fs_reg image = get_nir_image_deref(instr->variables[0]); - const fs_reg addr = retype(get_nir_src(instr->src[0]), - BRW_REGISTER_TYPE_UD); - const fs_reg src0 = (info->num_srcs >= 3 ? - retype(get_nir_src(instr->src[2]), base_type) : - fs_reg()); - const fs_reg src1 = (info->num_srcs >= 4 ? - retype(get_nir_src(instr->src[3]), base_type) : - fs_reg()); - fs_reg tmp; + break; + } - /* Emit an image load, store or atomic op. */ - if (instr->intrinsic == nir_intrinsic_image_load) - tmp = emit_image_load(bld, image, addr, surf_dims, arr_dims, format); + case nir_intrinsic_interp_var_at_offset: { + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - else if (instr->intrinsic == nir_intrinsic_image_store) - emit_image_store(bld, image, addr, src0, surf_dims, arr_dims, format); + if (const_offset) { + unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; + unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; - else + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, + dst_xy, + fs_reg(), /* src */ + fs_reg(off_x | (off_y << 4)), + interpolation); + } else { + fs_reg src = vgrf(glsl_type::ivec2_type); + fs_reg offset_src = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_F); + for (int i = 0; i < 2; i++) { + fs_reg temp = vgrf(glsl_type::float_type); + bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f)); + fs_reg itemp = vgrf(glsl_type::int_type); + bld.MOV(itemp, temp); /* float to int */ + + /* Clamp the upper end of the range to +7/16. + * ARB_gpu_shader5 requires that we support a maximum offset + * of +0.5, which isn't representable in a S0.4 value -- if + * we didn't clamp it, we'd end up with -8/16, which is the + * opposite of what the shader author wanted. + * + * This is legal due to ARB_gpu_shader5's quantization + * rules: + * + * "Not all values of may be supported; x and y + * offsets may be rounded to fixed-point values with the + * number of fraction bits given by the + * implementation-dependent constant + * FRAGMENT_INTERPOLATION_OFFSET_BITS" + */ + set_condmod(BRW_CONDITIONAL_L, + bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); + } + + const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; + emit_pixel_interpolater_send(bld, + opcode, + dst_xy, + src, + fs_reg(0u), + interpolation); + } + break; + } + + default: + unreachable("Invalid intrinsic"); + } + + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); + src.type = dest.type; + + bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src); + dest = offset(dest, bld, 1); + } + break; + } + default: + nir_emit_intrinsic(bld, instr); + break; + } +} + +void +fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + assert(stage == MESA_SHADER_COMPUTE); + struct brw_cs_prog_data *cs_prog_data = + (struct brw_cs_prog_data *) prog_data; + + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + switch (instr->intrinsic) { + case nir_intrinsic_barrier: + emit_barrier(); + cs_prog_data->uses_barrier = true; + break; + + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_work_group_id: { + gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); + fs_reg val = nir_system_values[sv]; + assert(val.file != BAD_FILE); + dest.type = val.type; + for (unsigned i = 0; i < 3; i++) + bld.MOV(offset(dest, bld, i), offset(val, bld, i)); + break; + } + + case nir_intrinsic_load_num_work_groups: { + const unsigned surface = + cs_prog_data->binding_table.work_groups_start; + + cs_prog_data->uses_num_work_groups = true; + + fs_reg surf_index = fs_reg(surface); + brw_mark_surface_used(prog_data, surface); + + /* Read the 3 GLuint components of gl_NumWorkGroups */ + for (unsigned i = 0; i < 3; i++) { + fs_reg read_result = + emit_untyped_read(bld, surf_index, + fs_reg(i << 2), + 1 /* dims */, 1 /* size */, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + bld.MOV(dest, read_result); + dest = offset(dest, bld, 1); + } + break; + } + + default: + nir_emit_intrinsic(bld, instr); + break; + } +} + +void +fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) +{ + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + bool has_indirect = false; + + switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_inc: + case nir_intrinsic_atomic_counter_dec: + case nir_intrinsic_atomic_counter_read: { + using namespace surface_access; + + /* Get the arguments of the atomic intrinsic. */ + const fs_reg offset = get_nir_src(instr->src[0]); + const unsigned surface = (stage_prog_data->binding_table.abo_start + + instr->const_index[0]); + fs_reg tmp; + + /* Emit a surface read or atomic op. */ + switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_read: + tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1); + break; + + case nir_intrinsic_atomic_counter_inc: + tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), + fs_reg(), 1, 1, BRW_AOP_INC); + break; + + case nir_intrinsic_atomic_counter_dec: + tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), + fs_reg(), 1, 1, BRW_AOP_PREDEC); + break; + + default: + unreachable("Unreachable"); + } + + /* Assign the result. */ + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), tmp); + + /* Mark the surface as used. */ + brw_mark_surface_used(stage_prog_data, surface); + break; + } + + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: { + using namespace image_access; + + /* Get the referenced image variable and type. */ + const nir_variable *var = instr->variables[0]->var; + const glsl_type *type = var->type->without_array(); + const brw_reg_type base_type = get_image_base_type(type); + + /* Get some metadata from the image intrinsic. */ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + const unsigned arr_dims = type->sampler_array ? 1 : 0; + const unsigned surf_dims = type->coordinate_components() - arr_dims; + const mesa_format format = + (var->data.image.write_only ? MESA_FORMAT_NONE : + _mesa_get_shader_image_format(var->data.image.format)); + + /* Get the arguments of the image intrinsic. */ + const fs_reg image = get_nir_image_deref(instr->variables[0]); + const fs_reg addr = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); + const fs_reg src0 = (info->num_srcs >= 3 ? + retype(get_nir_src(instr->src[2]), base_type) : + fs_reg()); + const fs_reg src1 = (info->num_srcs >= 4 ? + retype(get_nir_src(instr->src[3]), base_type) : + fs_reg()); + fs_reg tmp; + + /* Emit an image load, store or atomic op. */ + if (instr->intrinsic == nir_intrinsic_image_load) + tmp = emit_image_load(bld, image, addr, surf_dims, arr_dims, format); + + else if (instr->intrinsic == nir_intrinsic_image_store) + emit_image_store(bld, image, addr, src0, surf_dims, arr_dims, format); + + else tmp = emit_image_atomic(bld, image, addr, src0, src1, surf_dims, arr_dims, info->dest_components, get_image_atomic_op(instr->intrinsic, type)); @@ -1789,44 +2159,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1)); break; - case nir_intrinsic_load_front_face: - bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), - *emit_frontfacing_interpolation()); - break; - - case nir_intrinsic_load_vertex_id: - unreachable("should be lowered by lower_vertex_id()"); - - case nir_intrinsic_load_primitive_id: - assert(stage == MESA_SHADER_GEOMETRY); - assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id); - bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), - retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD)); - break; - - case nir_intrinsic_load_vertex_id_zero_base: - case nir_intrinsic_load_base_vertex: - case nir_intrinsic_load_instance_id: - case nir_intrinsic_load_invocation_id: - case nir_intrinsic_load_sample_mask_in: - case nir_intrinsic_load_sample_id: { - gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); - fs_reg val = nir_system_values[sv]; - assert(val.file != BAD_FILE); - dest.type = val.type; - bld.MOV(dest, val); - break; - } - - case nir_intrinsic_load_sample_pos: { - fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS]; - assert(sample_pos.file != BAD_FILE); - dest.type = sample_pos.type; - bld.MOV(dest, sample_pos); - bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1)); - break; - } - case nir_intrinsic_load_uniform_indirect: has_indirect = true; /* fallthrough */ @@ -1980,185 +2312,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_per_vertex_input_indirect: - assert(!"Not allowed"); - /* fallthrough */ - case nir_intrinsic_load_per_vertex_input: - emit_gs_input_load(dest, instr->src[0], instr->const_index[0], - instr->num_components); - break; - - /* Handle ARB_gpu_shader5 interpolation intrinsics - * - * It's worth a quick word of explanation as to why we handle the full - * variable-based interpolation intrinsic rather than a lowered version - * with like we do for other inputs. We have to do that because the way - * we set up inputs doesn't allow us to use the already setup inputs for - * interpolation. At the beginning of the shader, we go through all of - * the input variables and do the initial interpolation and put it in - * the nir_inputs array based on its location as determined in - * nir_lower_io. If the input isn't used, dead code cleans up and - * everything works fine. However, when we get to the ARB_gpu_shader5 - * interpolation intrinsics, we need to reinterpolate the input - * differently. If we used an intrinsic that just had an index it would - * only give us the offset into the nir_inputs array. However, this is - * useless because that value is post-interpolation and we need - * pre-interpolation. In order to get the actual location of the bits - * we get from the vertex fetching hardware, we need the variable. - */ - case nir_intrinsic_interp_var_at_centroid: - case nir_intrinsic_interp_var_at_sample: - case nir_intrinsic_interp_var_at_offset: { - assert(stage == MESA_SHADER_FRAGMENT); - - ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true; - - fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); - const glsl_interp_qualifier interpolation = - (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation; - - switch (instr->intrinsic) { - case nir_intrinsic_interp_var_at_centroid: - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_CENTROID, - dst_xy, - fs_reg(), /* src */ - fs_reg(0u), - interpolation); - break; - - case nir_intrinsic_interp_var_at_sample: { - nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); - - if (const_sample) { - unsigned msg_data = const_sample->i[0] << 4; - - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dst_xy, - fs_reg(), /* src */ - fs_reg(msg_data), - interpolation); - } else { - const fs_reg sample_src = retype(get_nir_src(instr->src[0]), - BRW_REGISTER_TYPE_UD); - - if (nir_src_is_dynamically_uniform(instr->src[0])) { - const fs_reg sample_id = bld.emit_uniformize(sample_src); - const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dst_xy, - fs_reg(), /* src */ - msg_data, - interpolation); - } else { - /* Make a loop that sends a message to the pixel interpolater - * for the sample number in each live channel. If there are - * multiple channels with the same sample number then these - * will be handled simultaneously with a single interation of - * the loop. - */ - bld.emit(BRW_OPCODE_DO); - - /* Get the next live sample number into sample_id_reg */ - const fs_reg sample_id = bld.emit_uniformize(sample_src); - - /* Set the flag register so that we can perform the send - * message on all channels that have the same sample number - */ - bld.CMP(bld.null_reg_ud(), - sample_src, sample_id, - BRW_CONDITIONAL_EQ); - const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); - fs_inst *inst = - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dst_xy, - fs_reg(), /* src */ - msg_data, - interpolation); - set_predicate(BRW_PREDICATE_NORMAL, inst); - - /* Continue the loop if there are any live channels left */ - set_predicate_inv(BRW_PREDICATE_NORMAL, - true, /* inverse */ - bld.emit(BRW_OPCODE_WHILE)); - } - } - - break; - } - - case nir_intrinsic_interp_var_at_offset: { - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - - if (const_offset) { - unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; - unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; - - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, - dst_xy, - fs_reg(), /* src */ - fs_reg(off_x | (off_y << 4)), - interpolation); - } else { - fs_reg src = vgrf(glsl_type::ivec2_type); - fs_reg offset_src = retype(get_nir_src(instr->src[0]), - BRW_REGISTER_TYPE_F); - for (int i = 0; i < 2; i++) { - fs_reg temp = vgrf(glsl_type::float_type); - bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f)); - fs_reg itemp = vgrf(glsl_type::int_type); - bld.MOV(itemp, temp); /* float to int */ - - /* Clamp the upper end of the range to +7/16. - * ARB_gpu_shader5 requires that we support a maximum offset - * of +0.5, which isn't representable in a S0.4 value -- if - * we didn't clamp it, we'd end up with -8/16, which is the - * opposite of what the shader author wanted. - * - * This is legal due to ARB_gpu_shader5's quantization - * rules: - * - * "Not all values of may be supported; x and y - * offsets may be rounded to fixed-point values with the - * number of fraction bits given by the - * implementation-dependent constant - * FRAGMENT_INTERPOLATION_OFFSET_BITS" - */ - set_condmod(BRW_CONDITIONAL_L, - bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); - } - - const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; - emit_pixel_interpolater_send(bld, - opcode, - dst_xy, - src, - fs_reg(0u), - interpolation); - } - break; - } - - default: - unreachable("Invalid intrinsic"); - } - - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); - src.type = dest.type; - - bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src); - dest = offset(dest, bld, 1); - } - break; - } - case nir_intrinsic_store_ssbo_indirect: has_indirect = true; /* fallthrough */ @@ -2240,23 +2393,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_barrier: - emit_barrier(); - if (stage == MESA_SHADER_COMPUTE) - ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true; - break; - - case nir_intrinsic_load_local_invocation_id: - case nir_intrinsic_load_work_group_id: { - gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); - fs_reg val = nir_system_values[sv]; - assert(val.file != BAD_FILE); - dest.type = val.type; - for (unsigned i = 0; i < 3; i++) - bld.MOV(offset(dest, bld, i), offset(val, bld, i)); - break; - } - case nir_intrinsic_ssbo_atomic_add: nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr); break; @@ -2312,46 +2448,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_num_work_groups: { - assert(devinfo->gen >= 7); - assert(stage == MESA_SHADER_COMPUTE); - - struct brw_cs_prog_data *cs_prog_data = - (struct brw_cs_prog_data *) prog_data; - const unsigned surface = - cs_prog_data->binding_table.work_groups_start; - - cs_prog_data->uses_num_work_groups = true; - - fs_reg surf_index = fs_reg(surface); - brw_mark_surface_used(prog_data, surface); - - /* Read the 3 GLuint components of gl_NumWorkGroups */ - for (unsigned i = 0; i < 3; i++) { - fs_reg read_result = - emit_untyped_read(bld, surf_index, - fs_reg(i << 2), - 1 /* dims */, 1 /* size */, - BRW_PREDICATE_NONE); - read_result.type = dest.type; - bld.MOV(dest, read_result); - dest = offset(dest, bld, 1); - } - break; - } - - case nir_intrinsic_emit_vertex_with_counter: - emit_gs_vertex(instr->src[0], instr->const_index[0]); - break; - - case nir_intrinsic_end_primitive_with_counter: - emit_gs_end_primitive(instr->src[0]); - break; - - case nir_intrinsic_set_vertex_count: - bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0])); - break; - default: unreachable("unknown intrinsic"); } -- cgit v1.1