summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs.h
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2015-11-14 17:40:43 -0800
committerKenneth Graunke <kenneth@whitecape.org>2016-05-03 16:28:00 -0700
commit7d9143ad885752184156b3a0d3e492aef09af3b0 (patch)
tree78ddc0d7b4aa2bb44439ebf2401e61c93076b2b4 /src/mesa/drivers/dri/i965/brw_fs.h
parent75881bed9e12cee1abfb5ec466e17ff2b253e0ab (diff)
downloadexternal_mesa3d-7d9143ad885752184156b3a0d3e492aef09af3b0.zip
external_mesa3d-7d9143ad885752184156b3a0d3e492aef09af3b0.tar.gz
external_mesa3d-7d9143ad885752184156b3a0d3e492aef09af3b0.tar.bz2
i965: Write a scalar TCS backend that runs in SINGLE_PATCH mode.
Unlike most shader stages, the Hull Shader hardware makes us explicitly tell it how many threads to dispatch and manually configure the channel mask. One perk of this is that we have a lot of flexibility - we can run it in either SIMD4x2 or SIMD8 mode. Treating it as SIMD8 means that shaders with 8 or fewer output vertices (which is overwhemingly the common case) can be handled by a single thread. This has several intriguing properties: - Accessing input arrays with gl_InvocationID as the index is a simple SIMD8 URB read with g1 as the header. No indirect addressing required. - Barriers are no-ops. - We could potentially do output shadowing to combine writes, as the concurrency concerns are gone. (We don't do this yet, though.) v2: Drop first_non_payload_grf change, as it was always adding 0 (caught by Jordan Justen). Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs.h')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h5
1 files changed, 5 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index a5c3297..ba6bd3f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -110,6 +110,7 @@ public:
bool run_fs(bool do_rep_send);
bool run_vs(gl_clip_plane *clip_planes);
+ bool run_tcs_single_patch();
bool run_tes();
bool run_gs();
bool run_cs();
@@ -126,6 +127,7 @@ public:
void assign_urb_setup();
void convert_attr_sources_to_hw_regs(fs_inst *inst);
void assign_vs_urb_setup();
+ void assign_tcs_single_patch_urb_setup();
void assign_tes_urb_setup();
void assign_gs_urb_setup();
bool assign_regs(bool allow_spilling);
@@ -250,6 +252,8 @@ public:
nir_ssa_undef_instr *instr);
void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
+ void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
@@ -405,6 +409,7 @@ public:
fs_reg userplane[MAX_CLIP_PLANES];
fs_reg final_gs_vertex_count;
fs_reg control_data_bits;
+ fs_reg invocation_id;
unsigned grf_used;
bool spilled_any_registers;