From 7998e37774c9589b456d9a951d47db98c16d6202 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 17 Oct 2016 10:11:55 -0700 Subject: anv/cmd_buffer: Move descriptor flushing into genX_cmd_buffer.c It really should have gone here all along. We were trying a bit too hard to make it gen-agnostic just because it didn't have any #if's. Signed-off-by: Jason Ekstrand Reviewed-by: Anuj Phogat --- src/intel/vulkan/genX_cmd_buffer.c | 362 +++++++++++++++++++++++++++++++++---- 1 file changed, 327 insertions(+), 35 deletions(-) (limited to 'src/intel/vulkan/genX_cmd_buffer.c') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index a8ecbd2..24e0012 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -629,6 +629,330 @@ cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer) } static void +add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, + struct anv_state state, struct anv_bo *bo, + uint32_t offset) +{ + /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and + * 9 for gen8+. We only write the first dword for gen8+ here and rely on + * the initial state to set the high bits to 0. */ + + const uint32_t dword = GEN_GEN < 8 ? 1 : 8; + + anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, + state.offset + dword * 4, bo, offset); +} + +static struct anv_state +alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb) +{ + struct anv_state state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + + struct GENX(RENDER_SURFACE_STATE) null_ss = { + .SurfaceType = SURFTYPE_NULL, + .SurfaceArray = fb->layers > 0, + .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM, +#if GEN_GEN >= 8 + .TileMode = YMAJOR, +#else + .TiledSurface = true, +#endif + .Width = fb->width - 1, + .Height = fb->height - 1, + .Depth = fb->layers - 1, + .RenderTargetViewExtent = fb->layers - 1, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state.map, &null_ss); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + + +static VkResult +emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + struct anv_state *bt_state) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline *pipeline; + uint32_t bias, state_offset; + + switch (stage) { + case MESA_SHADER_COMPUTE: + pipeline = cmd_buffer->state.compute_pipeline; + bias = 1; + break; + default: + pipeline = cmd_buffer->state.pipeline; + bias = 0; + break; + } + + if (!anv_pipeline_has_stage(pipeline, stage)) { + *bt_state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; + if (bias + map->surface_count == 0) { + *bt_state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, + bias + map->surface_count, + &state_offset); + uint32_t *bt_map = bt_state->map; + + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + if (stage == MESA_SHADER_COMPUTE && + get_cs_prog_data(cmd_buffer->state.compute_pipeline)->uses_num_work_groups) { + struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; + uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; + + struct anv_state surface_state; + surface_state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + const enum isl_format format = + anv_isl_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, + format, bo_offset, 12, 1); + + bt_map[0] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + + if (map->surface_count == 0) + goto out; + + if (map->image_count > 0) { + VkResult result = + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.push_constants_dirty |= 1 << stage; + } + + uint32_t image = 0; + for (uint32_t s = 0; s < map->surface_count; s++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; + + struct anv_state surface_state; + struct anv_bo *bo; + uint32_t bo_offset; + + if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) { + /* Color attachment binding */ + assert(stage == MESA_SHADER_FRAGMENT); + assert(binding->binding == 0); + if (binding->index < subpass->color_count) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[binding->index]]; + + assert(iview->color_rt_surface_state.alloc_size); + surface_state = iview->color_rt_surface_state; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } else { + /* Null render target */ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + surface_state = alloc_null_surface_state(cmd_buffer, fb); + } + + bt_map[bias + s] = surface_state.offset + state_offset; + continue; + } + + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + uint32_t offset = set->layout->binding[binding->binding].descriptor_index; + struct anv_descriptor *desc = &set->descriptors[offset + binding->index]; + + switch (desc->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + /* Nothing for us to do here */ + continue; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + surface_state = desc->image_view->sampler_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + surface_state = desc->image_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + *image_param = desc->image_view->storage_image_param; + image_param->surface_idx = bias + s; + break; + } + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + surface_state = desc->buffer_view->surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + surface_state = desc->buffer_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + *image_param = desc->buffer_view->storage_image_param; + image_param->surface_idx = bias + s; + break; + + default: + assert(!"Invalid descriptor type"); + continue; + } + + bt_map[bias + s] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + assert(image == map->image_count); + + out: + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*bt_state); + + return VK_SUCCESS; +} + +static VkResult +emit_samplers(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + struct anv_state *state) +{ + struct anv_pipeline *pipeline; + + if (stage == MESA_SHADER_COMPUTE) + pipeline = cmd_buffer->state.compute_pipeline; + else + pipeline = cmd_buffer->state.pipeline; + + if (!anv_pipeline_has_stage(pipeline, stage)) { + *state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; + if (map->sampler_count == 0) { + *state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + uint32_t size = map->sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t s = 0; s < map->sampler_count; s++) { + struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + uint32_t offset = set->layout->binding[binding->binding].descriptor_index; + struct anv_descriptor *desc = &set->descriptors[offset + binding->index]; + + if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && + desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + continue; + + struct anv_sampler *sampler = desc->sampler; + + /* This can happen if we have an unfilled slot since TYPE_SAMPLER + * happens to be zero. + */ + if (sampler == NULL) + continue; + + memcpy(state->map + (s * 16), + sampler->state, sizeof(sampler->state)); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*state); + + return VK_SUCCESS; +} + +static uint32_t +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +{ + VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; + + VkResult result = VK_SUCCESS; + anv_foreach_stage(s, dirty) { + result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + break; + result = emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); + + /* Re-emit all active binding tables */ + dirty |= cmd_buffer->state.pipeline->active_stages; + anv_foreach_stage(s, dirty) { + result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + return result; + result = emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + return result; + } + } + + cmd_buffer->state.descriptors_dirty &= ~dirty; + + return dirty; +} + +static void cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, uint32_t stages) { @@ -819,7 +1143,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) */ uint32_t dirty = 0; if (cmd_buffer->state.descriptors_dirty) - dirty = anv_cmd_buffer_flush_descriptor_sets(cmd_buffer); + dirty = flush_descriptor_sets(cmd_buffer); if (cmd_buffer->state.push_constants_dirty) { #if GEN_GEN >= 9 @@ -1030,12 +1354,10 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state surfaces = { 0, }, samplers = { 0, }; VkResult result; - result = anv_cmd_buffer_emit_samplers(cmd_buffer, - MESA_SHADER_COMPUTE, &samplers); + result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers); if (result != VK_SUCCESS) return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - MESA_SHADER_COMPUTE, &surfaces); + result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces); if (result != VK_SUCCESS) return result; @@ -1350,36 +1672,6 @@ genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) } } -struct anv_state -genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb) -{ - struct anv_state state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - - struct GENX(RENDER_SURFACE_STATE) null_ss = { - .SurfaceType = SURFTYPE_NULL, - .SurfaceArray = fb->layers > 0, - .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM, -#if GEN_GEN >= 8 - .TileMode = YMAJOR, -#else - .TiledSurface = true, -#endif - .Width = fb->width - 1, - .Height = fb->height - 1, - .Depth = fb->layers - 1, - .RenderTargetViewExtent = fb->layers - 1, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state.map, &null_ss); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - return state; -} - static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { -- cgit v1.1