summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2016-07-21 21:47:45 -0700
committerFrancisco Jerez <currojerez@riseup.net>2016-08-25 18:36:06 -0700
commitb00a236d6a6212323f77248ba923c65eeb02592b (patch)
tree9cb1f6d396ef5c78d8c613d0f72f20cffaade4a0 /src/mesa/drivers/dri/i965/brw_fs_nir.cpp
parent7dac8820730777756c00d7024330517848dc3b9f (diff)
downloadexternal_mesa3d-b00a236d6a6212323f77248ba923c65eeb02592b.zip
external_mesa3d-b00a236d6a6212323f77248ba923c65eeb02592b.tar.gz
external_mesa3d-b00a236d6a6212323f77248ba923c65eeb02592b.tar.bz2
i965/fs: Allocate fragment output temporaries on demand.
This gets rid of the duplication of logic between nir_setup_outputs() and get_frag_output() by allocating fragment output temporaries lazily whenever get_frag_output() is called. This makes nir_setup_outputs() a no-op for the fragment shader stage. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_nir.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp73
1 files changed, 27 insertions, 46 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 5910d8c..07db5d3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -81,11 +81,9 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
void
fs_visitor::nir_setup_outputs()
{
- if (stage == MESA_SHADER_TESS_CTRL)
+ if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT)
return;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
nir_foreach_variable(var, &nir->outputs) {
@@ -98,41 +96,6 @@ fs_visitor::nir_setup_outputs()
nir_setup_single_output_varying(&reg, var->type, &location);
break;
}
- case MESA_SHADER_FRAGMENT: {
- const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F,
- type_size_vec4_times_4(var->type));
-
- if (key->force_dual_color_blend &&
- var->data.location == FRAG_RESULT_DATA1) {
- this->dual_src_output = reg;
- } else if (var->data.index > 0) {
- assert(var->data.location == FRAG_RESULT_DATA0);
- assert(var->data.index == 1);
- this->dual_src_output = reg;
- } else if (var->data.location == FRAG_RESULT_COLOR) {
- /* Writing gl_FragColor outputs to all color regions. */
- for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
- this->outputs[i] = reg;
- }
- } else if (var->data.location == FRAG_RESULT_DEPTH) {
- this->frag_depth = reg;
- } else if (var->data.location == FRAG_RESULT_STENCIL) {
- this->frag_stencil = reg;
- } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
- this->sample_mask = reg;
- } else {
- /* gl_FragData or a user-defined FS output */
- assert(var->data.location >= FRAG_RESULT_DATA0 &&
- var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
-
- /* General color output. */
- for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
- int output = var->data.location - FRAG_RESULT_DATA0 + i;
- this->outputs[output] = offset(reg, bld, 4 * i);
- }
- }
- break;
- }
default:
unreachable("unhandled shader stage");
}
@@ -3251,7 +3214,23 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
}
static fs_reg
-get_frag_output(const fs_visitor *v, unsigned location)
+alloc_temporary(const fs_builder &bld, unsigned size, fs_reg *regs, unsigned n)
+{
+ if (n && regs[0].file != BAD_FILE) {
+ return regs[0];
+
+ } else {
+ const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, size);
+
+ for (unsigned i = 0; i < n; i++)
+ regs[i] = tmp;
+
+ return tmp;
+ }
+}
+
+static fs_reg
+alloc_frag_output(fs_visitor *v, unsigned location)
{
assert(v->stage == MESA_SHADER_FRAGMENT);
const brw_wm_prog_key *const key =
@@ -3260,23 +3239,25 @@ get_frag_output(const fs_visitor *v, unsigned location)
const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
- return v->dual_src_output;
+ return alloc_temporary(v->bld, 4, &v->dual_src_output, 1);
else if (l == FRAG_RESULT_COLOR)
- return v->outputs[0];
+ return alloc_temporary(v->bld, 4, v->outputs,
+ MAX2(key->nr_color_regions, 1));
else if (l == FRAG_RESULT_DEPTH)
- return v->frag_depth;
+ return alloc_temporary(v->bld, 1, &v->frag_depth, 1);
else if (l == FRAG_RESULT_STENCIL)
- return v->frag_stencil;
+ return alloc_temporary(v->bld, 1, &v->frag_stencil, 1);
else if (l == FRAG_RESULT_SAMPLE_MASK)
- return v->sample_mask;
+ return alloc_temporary(v->bld, 1, &v->sample_mask, 1);
else if (l >= FRAG_RESULT_DATA0 &&
l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
- return v->outputs[l - FRAG_RESULT_DATA0];
+ return alloc_temporary(v->bld, 4,
+ &v->outputs[l - FRAG_RESULT_DATA0], 1);
else
unreachable("Invalid location");
@@ -3324,7 +3305,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
assert(const_offset && "Indirect output stores not allowed");
const unsigned location = nir_intrinsic_base(instr) +
SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
- const fs_reg new_dest = retype(get_frag_output(this, location),
+ const fs_reg new_dest = retype(alloc_frag_output(this, location),
src.type);
for (unsigned j = 0; j < instr->num_components; j++)