summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2016-10-12 22:41:09 -0700
committerEmil Velikov <emil.l.velikov@gmail.com>2016-10-27 11:07:08 +0100
commitf17450ff7eaab3ebe01aee2d26b886c4560a40b3 (patch)
treed51ebb8e5b7aa2860019e678c3a68b32a3075c60
parentde826a10a703f1c2351cff48ab816474f34e9ded (diff)
downloadexternal_mesa3d-f17450ff7eaab3ebe01aee2d26b886c4560a40b3.zip
external_mesa3d-f17450ff7eaab3ebe01aee2d26b886c4560a40b3.tar.gz
external_mesa3d-f17450ff7eaab3ebe01aee2d26b886c4560a40b3.tar.bz2
i965: Don't use nir_assign_var_locations for VS/TES/GS outputs.
Fixes spec/arb_enhanced_layouts/execution/component-layout/vs-fs-array-dvec3. v2: Remove nir_outputs field from fs_visitor (caught by Tim and Iago). Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com> (cherry picked from commit 59864e8e02057cc6fa0448a8af067a3cf53389da)
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp46
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.h1
5 files changed, 12 insertions, 62 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7738a47..921cc00 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -492,19 +492,6 @@ type_size_scalar(const struct glsl_type *type)
return 0;
}
-/**
- * Returns the number of scalar components needed to store type, assuming
- * that vectors are padded out to vec4.
- *
- * This has the packing rules of type_size_vec4(), but counts components
- * similar to type_size_scalar().
- */
-extern "C" int
-type_size_vec4_times_4(const struct glsl_type *type)
-{
- return 4 * type_size_vec4(type);
-}
-
/* Attribute arrays are loaded as one vec4 per element (or matrix column),
* except for double-precision types, which are loaded as one dvec4.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 663b3ea..aad2160 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -325,7 +325,6 @@ public:
fs_reg *nir_locals;
fs_reg *nir_ssa_values;
fs_reg nir_inputs;
- fs_reg nir_outputs;
fs_reg *nir_system_values;
bool failed;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 9cad1a6..4baadc9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -49,47 +49,18 @@ fs_visitor::emit_nir_code()
}
void
-fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
- const glsl_type *type,
- unsigned *location)
-{
- if (type->is_array() || type->is_matrix()) {
- const struct glsl_type *elem_type = glsl_get_array_element(type);
- const unsigned length = glsl_get_length(type);
-
- for (unsigned i = 0; i < length; i++) {
- nir_setup_single_output_varying(reg, elem_type, location);
- }
- } else if (type->is_record()) {
- for (unsigned i = 0; i < type->length; i++) {
- const struct glsl_type *field_type = type->fields.structure[i].type;
- nir_setup_single_output_varying(reg, field_type, location);
- }
- } else {
- assert(type->is_scalar() || type->is_vector());
- unsigned num_iter = 1;
- if (type->is_dual_slot())
- num_iter = 2;
- for (unsigned count = 0; count < num_iter; count++) {
- this->outputs[*location] = *reg;
- *reg = offset(*reg, bld, 4);
- (*location)++;
- }
- }
-}
-
-void
fs_visitor::nir_setup_outputs()
{
if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT)
return;
- nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
-
nir_foreach_variable(var, &nir->outputs) {
- fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
- unsigned location = var->data.location;
- nir_setup_single_output_varying(&reg, var->type, &location);
+ const unsigned vec4s = type_size_vec4(var->type);
+ fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s);
+ for (unsigned i = 0; i < vec4s; i++) {
+ if (outputs[var->data.driver_location + i].file == BAD_FILE)
+ outputs[var->data.driver_location + i] = offset(reg, bld, 4 * i);
+ }
}
}
@@ -4242,12 +4213,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_store_output: {
fs_reg src = get_nir_src(instr->src[0]);
- fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
- instr->const_index[0]);
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
assert(const_offset && "Indirect output stores not allowed");
- new_dest = offset(new_dest, bld, const_offset->u32[0]);
+ fs_reg new_dest = retype(offset(outputs[instr->const_index[0]], bld,
+ 4 * const_offset->u32[0]), src.type);
unsigned num_components = instr->num_components;
unsigned first_component = nir_intrinsic_component(instr);
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index a935f42..de0e235 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -327,16 +327,11 @@ void
brw_nir_lower_vue_outputs(nir_shader *nir,
bool is_scalar)
{
- if (is_scalar) {
- nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
- VARYING_SLOT_VAR0,
- type_size_vec4_times_4);
- nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4, 0);
- } else {
- nir_foreach_variable(var, &nir->outputs)
- var->data.driver_location = var->data.location;
- nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0);
+ nir_foreach_variable(var, &nir->outputs) {
+ var->data.driver_location = var->data.location;
}
+
+ nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0);
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index aef5c53..645a818 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -34,7 +34,6 @@ extern "C" {
int type_size_scalar(const struct glsl_type *type);
int type_size_vec4(const struct glsl_type *type);
int type_size_dvec4(const struct glsl_type *type);
-int type_size_vec4_times_4(const struct glsl_type *type);
int type_size_vs_input(const struct glsl_type *type);
static inline int