summaryrefslogtreecommitdiffstats
path: root/src/intel/vulkan/anv_cmd_buffer.c
diff options
context:
space:
mode:
authorJordan Justen <jordan.l.justen@intel.com>2016-05-27 00:53:27 -0700
committerJordan Justen <jordan.l.justen@intel.com>2016-06-01 19:29:02 -0700
commit3ba9594f32239031ddeff764e9896d48d05125d0 (patch)
tree8f9acf25ab2c8400770afdd581e2f59960fe1808 /src/intel/vulkan/anv_cmd_buffer.c
parent30685392e0e477771e6c6ba232a63df6b0e2ed83 (diff)
downloadexternal_mesa3d-3ba9594f32239031ddeff764e9896d48d05125d0.zip
external_mesa3d-3ba9594f32239031ddeff764e9896d48d05125d0.tar.gz
external_mesa3d-3ba9594f32239031ddeff764e9896d48d05125d0.tar.bz2
anv: Support new local ID generation & cross-thread constants
The cross thread constant support appears on Haswell. It allows us to upload a set of uniform data for all threads without duplicating it per thread. We also support per-thread data which allows us to store a per-thread ID in one of the uniforms that can be used to calculate the gl_LocalInvocationIndex and gl_LocalInvocationID variables. v4: * Support the old local ID push constant layout as well (Jason) Cc: "12.0" <mesa-stable@lists.freedesktop.org> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Diffstat (limited to 'src/intel/vulkan/anv_cmd_buffer.c')
-rw-r--r--src/intel/vulkan/anv_cmd_buffer.c54
1 files changed, 30 insertions, 24 deletions
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 63d096c..edaaa3d 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1065,23 +1065,14 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
- const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
- const unsigned push_constant_data_size =
- (local_id_dwords + prog_data->nr_params) * 4;
- const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
- const unsigned param_aligned_count =
- reg_aligned_constant_size / sizeof(uint32_t);
-
/* If we don't actually have any push constants, bail. */
- if (reg_aligned_constant_size == 0)
+ if (cs_prog_data->push.total.size == 0)
return (struct anv_state) { .offset = 0 };
- const unsigned total_push_constants_size =
- reg_aligned_constant_size * cs_prog_data->threads;
const unsigned push_constant_alignment =
cmd_buffer->device->info.gen < 8 ? 32 : 64;
const unsigned aligned_total_push_constants_size =
- ALIGN(total_push_constants_size, push_constant_alignment);
+ ALIGN(cs_prog_data->push.total.size, push_constant_alignment);
struct anv_state state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
aligned_total_push_constants_size,
@@ -1090,21 +1081,36 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
/* Walk through the param array and fill the buffer with data */
uint32_t *u32_map = state.map;
- brw_cs_fill_local_id_payload(cs_prog_data, u32_map, cs_prog_data->threads,
- reg_aligned_constant_size);
-
- /* Setup uniform data for the first thread */
- for (unsigned i = 0; i < prog_data->nr_params; i++) {
- uint32_t offset = (uintptr_t)prog_data->param[i];
- u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset);
+ if (cs_prog_data->push.cross_thread.size > 0) {
+ assert(cs_prog_data->thread_local_id_index < 0 ||
+ cs_prog_data->thread_local_id_index >=
+ cs_prog_data->push.cross_thread.dwords);
+ for (unsigned i = 0;
+ i < cs_prog_data->push.cross_thread.dwords;
+ i++) {
+ uint32_t offset = (uintptr_t)prog_data->param[i];
+ u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
+ }
}
- /* Copy uniform data from the first thread to every other thread */
- const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t);
- for (unsigned t = 1; t < cs_prog_data->threads; t++) {
- memcpy(&u32_map[t * param_aligned_count + local_id_dwords],
- &u32_map[local_id_dwords],
- uniform_data_size);
+ if (cs_prog_data->push.per_thread.size > 0) {
+ brw_cs_fill_local_id_payload(cs_prog_data, u32_map, cs_prog_data->threads,
+ cs_prog_data->push.per_thread.size);
+ for (unsigned t = 0; t < cs_prog_data->threads; t++) {
+ unsigned dst =
+ 8 * (cs_prog_data->push.per_thread.regs * t +
+ cs_prog_data->push.cross_thread.regs +
+ cs_prog_data->local_invocation_id_regs);
+ unsigned src = cs_prog_data->push.cross_thread.dwords;
+ for ( ; src < prog_data->nr_params; src++, dst++) {
+ if (src != cs_prog_data->thread_local_id_index) {
+ uint32_t offset = (uintptr_t)prog_data->param[src];
+ u32_map[dst] = *(uint32_t *)((uint8_t *)data + offset);
+ } else {
+ u32_map[dst] = t * cs_prog_data->simd_size;
+ }
+ }
+ }
}
if (!cmd_buffer->device->info.has_llc)