diff options
author | Chih-Wei Huang <cwhuang@linux.org.tw> | 2016-11-15 16:02:40 +0800 |
---|---|---|
committer | Chih-Wei Huang <cwhuang@linux.org.tw> | 2016-11-16 10:46:46 +0800 |
commit | f43ac65d6166f73eb439391b463218d97c65cce9 (patch) | |
tree | 0d06ec98e48be80cd924d3f6647a3913f3686ce0 /src | |
parent | 1955a9ca8d71ba5eaff4073bdfff4dee76e1a73a (diff) | |
parent | f2f487ebbb808010528edd69000694bfe525f87b (diff) | |
download | external_mesa3d-f43ac65d6166f73eb439391b463218d97c65cce9.zip external_mesa3d-f43ac65d6166f73eb439391b463218d97c65cce9.tar.gz external_mesa3d-f43ac65d6166f73eb439391b463218d97c65cce9.tar.bz2 |
Merge remote-tracking branch 'mesa/13.0' into nougat-x86
Diffstat (limited to 'src')
59 files changed, 1223 insertions, 752 deletions
diff --git a/src/amd/addrlib/addrtypes.h b/src/amd/addrlib/addrtypes.h index 4c68ac5..4dd7bab 100644 --- a/src/amd/addrlib/addrtypes.h +++ b/src/amd/addrlib/addrtypes.h @@ -88,7 +88,11 @@ typedef int INT; #ifndef ADDR_FASTCALL #if defined(__GNUC__) - #define ADDR_FASTCALL __attribute__((regparm(0))) + #if defined(__i386__) + #define ADDR_FASTCALL __attribute__((regparm(0))) + #else + #define ADDR_FASTCALL + #endif #else #define ADDR_FASTCALL __fastcall #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index b757e8c..f235cc2 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2609,6 +2609,24 @@ static void emit_barrier(struct nir_to_llvm_context *ctx) ctx->voidt, NULL, 0, 0); } +static void emit_discard_if(struct nir_to_llvm_context *ctx, + nir_intrinsic_instr *instr) +{ + LLVMValueRef cond; + ctx->shader_info->fs.can_discard = true; + + cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, + get_src(ctx, instr->src[0]), + ctx->i32zero, ""); + + cond = LLVMBuildSelect(ctx->builder, cond, + LLVMConstReal(ctx->f32, -1.0f), + ctx->f32zero, ""); + emit_llvm_intrinsic(ctx, "llvm.AMDGPU.kill", + LLVMVoidTypeInContext(ctx->context), + &cond, 1, 0); +} + static LLVMValueRef visit_load_local_invocation_index(struct nir_to_llvm_context *ctx) { @@ -2921,6 +2939,9 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, LLVMVoidTypeInContext(ctx->context), NULL, 0, 0); break; + case nir_intrinsic_discard_if: + emit_discard_if(ctx, instr); + break; case nir_intrinsic_memory_barrier: emit_waitcnt(ctx); break; @@ -4352,12 +4373,10 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx, for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef values[4]; - bool last; + if (!(ctx->output_mask & (1ull << i))) continue; - last = ctx->output_mask <= ((1ull << (i + 1)) - 1); - if (i == FRAG_RESULT_DEPTH) { ctx->shader_info->fs.writes_z = true; depth = to_float(ctx, LLVMBuildLoad(ctx->builder, @@ -4367,10 +4386,14 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx, stencil = to_float(ctx, LLVMBuildLoad(ctx->builder, ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); } else { + bool last = false; for (unsigned j = 0; j < 4; j++) values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil) + last = ctx->output_mask <= ((1ull << (i + 1)) - 1); + si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last); index++; } diff --git a/src/amd/vulkan/.gitignore b/src/amd/vulkan/.gitignore index e55e353..2a42d7f 100644 --- a/src/amd/vulkan/.gitignore +++ b/src/amd/vulkan/.gitignore @@ -4,3 +4,4 @@ /radv_timestamp.h /dev_icd.json /vk_format_table.c +/radeon_icd.*.json diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am index 44d2a66..c559a95 100644 --- a/src/amd/vulkan/Makefile.am +++ b/src/amd/vulkan/Makefile.am @@ -131,11 +131,11 @@ vk_format_table.c: vk_format_table.py \ $(PYTHON2) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@ BUILT_SOURCES = $(VULKAN_GENERATED_FILES) -CLEANFILES = $(BUILT_SOURCES) dev_icd.json radv_timestamp.h +CLEANFILES = $(BUILT_SOURCES) dev_icd.json radeon_icd.@host_cpu@.json EXTRA_DIST = \ $(top_srcdir)/include/vulkan/vk_icd.h \ dev_icd.json.in \ - radeon_icd.json \ + radeon_icd.json.in \ radv_entrypoints_gen.py \ vk_format_layout.csv \ vk_format_parse.py \ @@ -155,7 +155,7 @@ libvulkan_radeon_la_LDFLAGS = \ icdconfdir = @VULKAN_ICD_INSTALL_DIR@ -icdconf_DATA = radeon_icd.json +icdconf_DATA = radeon_icd.@host_cpu@.json # The following is used for development purposes, by setting VK_ICD_FILENAMES. noinst_DATA = dev_icd.json @@ -164,4 +164,9 @@ dev_icd.json : dev_icd.json.in -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \ < $(srcdir)/dev_icd.json.in > $@ +radeon_icd.@host_cpu@.json : radeon_icd.json.in + $(AM_V_GEN) $(SED) \ + -e "s#@install_libdir@#${libdir}#" \ + < $(srcdir)/radeon_icd.json.in > $@ + include $(top_srcdir)/install-lib-links.mk diff --git a/src/amd/vulkan/radeon_icd.json b/src/amd/vulkan/radeon_icd.json.in index cbb4aab..a8b441d 100644 --- a/src/amd/vulkan/radeon_icd.json +++ b/src/amd/vulkan/radeon_icd.json.in @@ -1,7 +1,7 @@ { "file_format_version": "1.0.0", "ICD": { - "library_path": "libvulkan_radeon.so", + "library_path": "@install_libdir@/libvulkan_radeon.so", "api_version": "1.0.3" } } diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 7410bbc..4a924ea 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -113,13 +113,19 @@ static const VkExtensionProperties global_extensions[] = { #ifdef VK_USE_PLATFORM_XCB_KHR { .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, - .specVersion = 5, + .specVersion = 6, + }, +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + { + .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME, + .specVersion = 6, }, #endif #ifdef VK_USE_PLATFORM_WAYLAND_KHR { .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, - .specVersion = 4, + .specVersion = 5, }, #endif }; @@ -127,7 +133,7 @@ static const VkExtensionProperties global_extensions[] = { static const VkExtensionProperties device_extensions[] = { { .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, - .specVersion = 67, + .specVersion = 68, }, }; @@ -1166,6 +1172,8 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence) RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_fence, fence, _fence); + if (fence->signalled) + return VK_SUCCESS; if (!fence->submitted) return VK_NOT_READY; @@ -1728,26 +1736,50 @@ radv_tex_bordercolor(VkBorderColor bcolor) return 0; } +static unsigned +radv_tex_aniso_filter(unsigned filter) +{ + if (filter < 2) + return 0; + if (filter < 4) + return 1; + if (filter < 8) + return 2; + if (filter < 16) + return 3; + return 4; +} + static void radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, const VkSamplerCreateInfo *pCreateInfo) { - uint32_t max_aniso = 0; - uint32_t max_aniso_ratio = 0;//TODO + uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ? + (uint32_t) pCreateInfo->maxAnisotropy : 0; + uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); bool is_vi; is_vi = (device->instance->physicalDevice.rad_info.chip_class >= VI); + if (!is_vi && max_aniso > 0) { + radv_finishme("Anisotropic filtering must be disabled manually " + "by the shader on SI-CI when BASE_LEVEL == LAST_LEVEL\n"); + max_aniso = max_aniso_ratio = 0; + } + sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) | S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) | S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) | S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) | S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) | + S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | + S_008F30_ANISO_BIAS(max_aniso_ratio) | S_008F30_DISABLE_CUBE_WRAP(0) | S_008F30_COMPAT_MODE(is_vi)); sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) | - S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8))); + S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) | + S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) | S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) | diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 78efbbe..7c10b78 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -144,6 +144,7 @@ radv_optimize_nir(struct nir_shader *shader) NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); NIR_PASS(progress, shader, nir_opt_undef); + NIR_PASS(progress, shader, nir_opt_conditional_discard); } while (progress); } @@ -642,7 +643,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, uint32_t blend_enable, uint32_t blend_need_alpha, - bool single_cb_enable) + bool single_cb_enable, + bool blend_mrt0_is_dual_src) { RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; @@ -664,6 +666,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, blend->cb_shader_mask = si_get_cb_shader_mask(col_format); + if (blend_mrt0_is_dual_src) + col_format |= (col_format & 0xf) << 4; if (!col_format) col_format |= V_028714_SPI_SHADER_32_R; blend->spi_shader_col_format = col_format; @@ -715,8 +719,13 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, struct radv_blend_state *blend = &pipeline->graphics.blend; unsigned mode = V_028808_CB_NORMAL; uint32_t blend_enable = 0, blend_need_alpha = 0; + bool blend_mrt0_is_dual_src = false; int i; bool single_cb_enable = false; + + if (!vkblend) + return; + if (extra && extra->custom_blend_mode) { single_cb_enable = true; mode = extra->custom_blend_mode; @@ -755,7 +764,9 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, } if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA)) - radv_finishme("dual source blending"); + if (i == 0) + blend_mrt0_is_dual_src = true; + if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) { srcRGB = VK_BLEND_FACTOR_ONE; dstRGB = VK_BLEND_FACTOR_ONE; @@ -797,7 +808,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, blend->cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE); radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo, - blend_enable, blend_need_alpha, single_cb_enable); + blend_enable, blend_need_alpha, single_cb_enable, blend_mrt0_is_dual_src); } static uint32_t si_translate_stencil_op(enum VkStencilOp op) @@ -1069,18 +1080,27 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, struct radv_dynamic_state *dynamic = &pipeline->dynamic_state; - dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; - if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { - typed_memcpy(dynamic->viewport.viewports, - pCreateInfo->pViewportState->pViewports, - pCreateInfo->pViewportState->viewportCount); - } + /* Section 9.2 of the Vulkan 1.0.15 spec says: + * + * pViewportState is [...] NULL if the pipeline + * has rasterization disabled. + */ + if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) { + assert(pCreateInfo->pViewportState); + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } - dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; - if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { - typed_memcpy(dynamic->scissor.scissors, - pCreateInfo->pViewportState->pScissors, - pCreateInfo->pViewportState->scissorCount); + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } } if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { @@ -1098,7 +1118,21 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, pCreateInfo->pRasterizationState->depthBiasSlopeFactor; } - if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + /* Section 9.2 of the Vulkan 1.0.15 spec says: + * + * pColorBlendState is [...] NULL if the pipeline has rasterization + * disabled or if the subpass of the render pass the pipeline is + * created against does not use any color attachments. + */ + bool uses_color_att = false; + for (unsigned i = 0; i < subpass->color_count; ++i) { + if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) { + uses_color_att = true; + break; + } + } + + if (uses_color_att && states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { assert(pCreateInfo->pColorBlendState); typed_memcpy(dynamic->blend_constants, pCreateInfo->pColorBlendState->blendConstants, 4); @@ -1110,14 +1144,17 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, * no need to override the depthstencil defaults in * radv_pipeline::dynamic_state when there is no depthstencil attachment. * - * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * Section 9.2 of the Vulkan 1.0.15 spec says: * - * pDepthStencilState [...] may only be NULL if renderPass and subpass - * specify a subpass that has no depth/stencil attachment. + * pDepthStencilState is [...] NULL if the pipeline has rasterization + * disabled or if the subpass of the render pass the pipeline is created + * against does not use a depth/stencil attachment. */ - if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && + subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + assert(pCreateInfo->pDepthStencilState); + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { - assert(pCreateInfo->pDepthStencilState); dynamic->depth_bounds.min = pCreateInfo->pDepthStencilState->minDepthBounds; dynamic->depth_bounds.max = @@ -1125,7 +1162,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, } if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { - assert(pCreateInfo->pDepthStencilState); dynamic->stencil_compare_mask.front = pCreateInfo->pDepthStencilState->front.compareMask; dynamic->stencil_compare_mask.back = @@ -1133,7 +1169,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, } if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { - assert(pCreateInfo->pDepthStencilState); dynamic->stencil_write_mask.front = pCreateInfo->pDepthStencilState->front.writeMask; dynamic->stencil_write_mask.back = @@ -1141,7 +1176,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, } if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { - assert(pCreateInfo->pDepthStencilState); dynamic->stencil_reference.front = pCreateInfo->pDepthStencilState->front.reference; dynamic->stencil_reference.back = diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index a30443d..2bb4868 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -227,6 +227,7 @@ NIR_FILES = \ nir/nir_metadata.c \ nir/nir_move_vec_src_uses_to_dest.c \ nir/nir_normalize_cubemap_coords.c \ + nir/nir_opt_conditional_discard.c \ nir/nir_opt_constant_folding.c \ nir/nir_opt_copy_propagate.c \ nir/nir_opt_cse.c \ diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index f5c177e..f62a848 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -181,7 +181,43 @@ private: }; -class array_resize_visitor : public ir_hierarchical_visitor { +/** + * A visitor helper that provides methods for updating the types of + * ir_dereferences. Classes that update variable types (say, updating + * array sizes) will want to use this so that dereference types stay in sync. + */ +class deref_type_updater : public ir_hierarchical_visitor { +public: + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + ir->type = ir->var->type; + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_dereference_array *ir) + { + const glsl_type *const vt = ir->array->type; + if (vt->is_array()) + ir->type = vt->fields.array; + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_dereference_record *ir) + { + for (unsigned i = 0; i < ir->record->type->length; i++) { + const struct glsl_struct_field *field = + &ir->record->type->fields.structure[i]; + if (strcmp(field->name, ir->field) == 0) { + ir->type = field->type; + break; + } + } + return visit_continue; + } +}; + + +class array_resize_visitor : public deref_type_updater { public: unsigned num_vertices; gl_shader_program *prog; @@ -240,24 +276,6 @@ public: return visit_continue; } - - /* Dereferences of input variables need to be updated so that their type - * matches the newly assigned type of the variable they are accessing. */ - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - ir->type = ir->var->type; - return visit_continue; - } - - /* Dereferences of 2D input arrays need to be updated so that their type - * matches the newly assigned type of the array they are accessing. */ - virtual ir_visitor_status visit_leave(ir_dereference_array *ir) - { - const glsl_type *const vt = ir->array->type; - if (vt->is_array()) - ir->type = vt->fields.array; - return visit_continue; - } }; /** @@ -1353,7 +1371,7 @@ move_non_declarations(exec_list *instructions, exec_node *last, * it inside that function leads to compiler warnings with some versions of * gcc. */ -class array_sizing_visitor : public ir_hierarchical_visitor { +class array_sizing_visitor : public deref_type_updater { public: array_sizing_visitor() : mem_ctx(ralloc_context(NULL)), @@ -2273,6 +2291,8 @@ update_array_sizes(struct gl_shader_program *prog) if (prog->_LinkedShaders[i] == NULL) continue; + bool types_were_updated = false; + foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) { ir_variable *const var = node->as_variable(); @@ -2328,11 +2348,15 @@ update_array_sizes(struct gl_shader_program *prog) var->type = glsl_type::get_array_instance(var->type->fields.array, size + 1); - /* FINISHME: We should update the types of array - * dereferences of this variable now. - */ + types_were_updated = true; } } + + /* Update the types of dereferences in case we changed any. */ + if (types_were_updated) { + deref_type_updater v; + v.run(prog->_LinkedShaders[i]->ir); + } } } @@ -4785,14 +4809,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) "type of shader\n"); } - for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] != NULL) { - _mesa_delete_linked_shader(ctx, prog->_LinkedShaders[i]); - } - - prog->_LinkedShaders[i] = NULL; - } - /* Link all shaders for a particular stage and validate the result. */ for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { diff --git a/src/compiler/glsl/lower_ubo_reference.cpp b/src/compiler/glsl/lower_ubo_reference.cpp index 37134a9..eafa1dd 100644 --- a/src/compiler/glsl/lower_ubo_reference.cpp +++ b/src/compiler/glsl/lower_ubo_reference.cpp @@ -107,7 +107,6 @@ public: struct gl_linked_shader *shader; bool clamp_block_indices; - struct gl_uniform_buffer_variable *ubo_var; const struct glsl_struct_field *struct_field; ir_variable *variable; ir_rvalue *uniform_block; @@ -308,8 +307,11 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, this->uniform_block = index; } - this->ubo_var = var->is_interface_instance() - ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location]; + if (var->is_interface_instance()) { + *const_offset = 0; + } else { + *const_offset = blocks[i]->Uniforms[var->data.location].Offset; + } break; } @@ -317,8 +319,6 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, assert(this->uniform_block); - *const_offset = ubo_var->Offset; - this->struct_field = NULL; setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major, matrix_columns, &this->struct_field, packing); diff --git a/src/compiler/glsl/standalone.cpp b/src/compiler/glsl/standalone.cpp index 055c433..f096490 100644 --- a/src/compiler/glsl/standalone.cpp +++ b/src/compiler/glsl/standalone.cpp @@ -421,7 +421,7 @@ standalone_compile_shader(const struct standalone_options *_options, } if ((status == EXIT_SUCCESS) && options->do_link) { - _mesa_clear_shader_program_data(whole_program); + _mesa_clear_shader_program_data(ctx, whole_program); link_shaders(ctx, whole_program); status = (whole_program->LinkStatus) ? EXIT_SUCCESS : EXIT_FAILURE; diff --git a/src/compiler/glsl/standalone_scaffolding.cpp b/src/compiler/glsl/standalone_scaffolding.cpp index 35e40d6..d229368 100644 --- a/src/compiler/glsl/standalone_scaffolding.cpp +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@ -123,8 +123,16 @@ _mesa_delete_linked_shader(struct gl_context *ctx, } void -_mesa_clear_shader_program_data(struct gl_shader_program *shProg) +_mesa_clear_shader_program_data(struct gl_context *ctx, + struct gl_shader_program *shProg) { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (shProg->_LinkedShaders[i] != NULL) { + _mesa_delete_linked_shader(ctx, shProg->_LinkedShaders[i]); + shProg->_LinkedShaders[i] = NULL; + } + } + shProg->NumUniformStorage = 0; shProg->UniformStorage = NULL; shProg->NumUniformRemapTable = 0; diff --git a/src/compiler/glsl/standalone_scaffolding.h b/src/compiler/glsl/standalone_scaffolding.h index b56dd3e..2c04548 100644 --- a/src/compiler/glsl/standalone_scaffolding.h +++ b/src/compiler/glsl/standalone_scaffolding.h @@ -56,7 +56,8 @@ _mesa_delete_linked_shader(struct gl_context *ctx, struct gl_linked_shader *sh); extern "C" void -_mesa_clear_shader_program_data(struct gl_shader_program *); +_mesa_clear_shader_program_data(struct gl_context *ctx, + struct gl_shader_program *); extern "C" void _mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d6c8efa..0369700 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2625,6 +2625,8 @@ bool nir_opt_remove_phis(nir_shader *shader); bool nir_opt_undef(nir_shader *shader); +bool nir_opt_conditional_discard(nir_shader *shader); + void nir_sweep(nir_shader *shader); nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c b/src/compiler/nir/nir_lower_wpos_ytransform.c index 173f058..f211c73 100644 --- a/src/compiler/nir/nir_lower_wpos_ytransform.c +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c @@ -273,6 +273,26 @@ lower_interp_var_at_offset(lower_wpos_ytransform_state *state, } static void +lower_load_sample_pos(lower_wpos_ytransform_state *state, + nir_intrinsic_instr *intr) +{ + nir_builder *b = &state->b; + b->cursor = nir_after_instr(&intr->instr); + + nir_ssa_def *pos = &intr->dest.ssa; + nir_ssa_def *scale = nir_channel(b, get_transform(state), 0); + nir_ssa_def *neg_scale = nir_channel(b, get_transform(state), 2); + /* Either y or 1-y for scale equal to 1 or -1 respectively. */ + nir_ssa_def *flipped_y = + nir_fadd(b, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)), + nir_fmul(b, nir_channel(b, pos, 1), scale)); + nir_ssa_def *flipped_pos = nir_vec2(b, nir_channel(b, pos, 0), flipped_y); + + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(flipped_pos), + flipped_pos->parent_instr); +} + +static void lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block) { nir_foreach_instr_safe(instr, block) { @@ -287,6 +307,10 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block /* gl_FragCoord should not have array/struct deref's: */ assert(dvar->deref.child == NULL); lower_fragcoord(state, intr); + } else if (var->data.mode == nir_var_system_value && + var->data.location == SYSTEM_VALUE_SAMPLE_POS) { + assert(dvar->deref.child == NULL); + lower_load_sample_pos(state, intr); } } else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) { lower_interp_var_at_offset(state, intr); diff --git a/src/compiler/nir/nir_opt_conditional_discard.c b/src/compiler/nir/nir_opt_conditional_discard.c new file mode 100644 index 0000000..2fde179 --- /dev/null +++ b/src/compiler/nir/nir_opt_conditional_discard.c @@ -0,0 +1,125 @@ +/* + * Copyright © 2016 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +/** @file nir_opt_conditional_discard.c + * + * Handles optimization of lowering if (cond) discard to discard_if(cond). + */ + +static bool +nir_opt_conditional_discard_block(nir_block *block, void *mem_ctx) +{ + nir_builder bld; + + if (nir_cf_node_is_first(&block->cf_node)) + return false; + + nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node); + if (prev_node->type != nir_cf_node_if) + return false; + + nir_if *if_stmt = nir_cf_node_as_if(prev_node); + nir_block *then_block = nir_if_first_then_block(if_stmt); + nir_block *else_block = nir_if_first_else_block(if_stmt); + + /* check there is only one else block and it is empty */ + if (nir_if_last_else_block(if_stmt) != else_block) + return false; + if (!exec_list_is_empty(&else_block->instr_list)) + return false; + + /* check there is only one then block and it has only one instruction in it */ + if (nir_if_last_then_block(if_stmt) != then_block) + return false; + if (exec_list_is_empty(&then_block->instr_list)) + return false; + if (exec_list_length(&then_block->instr_list) > 1) + return false; + /* + * make sure no subsequent phi nodes point at this if. + */ + nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node)); + nir_foreach_instr_safe(instr, after) { + if (instr->type != nir_instr_type_phi) + break; + nir_phi_instr *phi = nir_instr_as_phi(instr); + + nir_foreach_phi_src(phi_src, phi) { + if (phi_src->pred == then_block || + phi_src->pred == else_block) + return false; + } + } + + /* Get the first instruction in the then block and confirm it is + * a discard or a discard_if + */ + nir_instr *instr = nir_block_first_instr(then_block); + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_discard && + intrin->intrinsic != nir_intrinsic_discard_if) + return false; + + nir_src cond; + + nir_builder_init(&bld, mem_ctx); + bld.cursor = nir_before_cf_node(prev_node); + if (intrin->intrinsic == nir_intrinsic_discard) + cond = if_stmt->condition; + else + cond = nir_src_for_ssa(nir_iand(&bld, + nir_ssa_for_src(&bld, if_stmt->condition, 1), + nir_ssa_for_src(&bld, intrin->src[0], 1))); + + nir_intrinsic_instr *discard_if = + nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_discard_if); + nir_src_copy(&discard_if->src[0], &cond, discard_if); + + nir_instr_insert_before_cf(prev_node, &discard_if->instr); + nir_instr_remove(&intrin->instr); + nir_cf_node_remove(&if_stmt->cf_node); + + return true; +} + +bool +nir_opt_conditional_discard(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) { + void *mem_ctx = ralloc_parent(function->impl); + nir_foreach_block_safe(block, function->impl) { + progress |= nir_opt_conditional_discard_block(block, mem_ctx); + } + } + } + return progress; +} diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 85d4b85..0020a5b 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -80,8 +80,6 @@ #include "eglimage.h" #include "eglsync.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) - struct wl_buffer; struct dri2_egl_driver diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 9db9964..697b6fe 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -2384,7 +2384,7 @@ _eglLockDisplayInterop(EGLDisplay dpy, EGLContext context, return MESA_GLINTEROP_SUCCESS; } -int +PUBLIC int MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context, struct mesa_glinterop_device_info *out) { @@ -2406,7 +2406,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context, return ret; } -int +PUBLIC int MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context, struct mesa_glinterop_export_in *in, struct mesa_glinterop_export_out *out) diff --git a/src/egl/main/egldefines.h b/src/egl/main/egldefines.h index 13a7563..d0502f3 100644 --- a/src/egl/main/egldefines.h +++ b/src/egl/main/egldefines.h @@ -34,6 +34,8 @@ #ifndef EGLDEFINES_INCLUDED #define EGLDEFINES_INCLUDED +#include "util/macros.h" + #ifdef __cplusplus extern "C" { #endif @@ -48,9 +50,6 @@ extern "C" { #define _EGL_VENDOR_STRING "Mesa Project" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) -#define MIN2(A, B) (((A) < (B)) ? (A) : (B)) - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/hud/hud_cpufreq.c b/src/gallium/auxiliary/hud/hud_cpufreq.c index 4501bbb..19a6f08 100644 --- a/src/gallium/auxiliary/hud/hud_cpufreq.c +++ b/src/gallium/auxiliary/hud/hud_cpufreq.c @@ -36,6 +36,7 @@ #include "hud/hud_private.h" #include "util/list.h" #include "os/os_time.h" +#include "os/os_thread.h" #include "util/u_memory.h" #include <stdio.h> #include <unistd.h> @@ -61,6 +62,7 @@ struct cpufreq_info static int gcpufreq_count = 0; static struct list_head gcpufreq_list; +pipe_static_mutex(gcpufreq_mutex); static struct cpufreq_info * find_cfi_by_index(int cpu_index, int mode) @@ -112,14 +114,6 @@ query_cfi_load(struct hud_graph *gr) } } -static void -free_query_data(void *p) -{ - struct cpufreq_info *cfi = (struct cpufreq_info *)p; - list_del(&cfi->list); - FREE(cfi); -} - /** * Create and initialize a new object for a specific CPU. * \param pane parent context. @@ -162,11 +156,6 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index, gr->query_data = cfi; gr->query_new_value = query_cfi_load; - /* Don't use free() as our callback as that messes up Gallium's - * memory debugger. Use simple free_query_data() wrapper. - */ - gr->free_query_data = free_query_data; - hud_pane_add_graph(pane, gr); hud_pane_set_max_value(pane, 3000000 /* 3 GHz */); } @@ -199,16 +188,21 @@ hud_get_num_cpufreq(bool displayhelp) int cpu_index; /* Return the number of CPU metrics we support. */ - if (gcpufreq_count) + pipe_mutex_lock(gcpufreq_mutex); + if (gcpufreq_count) { + pipe_mutex_unlock(gcpufreq_mutex); return gcpufreq_count; + } /* Scan /sys/devices.../cpu, for every object type we support, create * and persist an object to represent its different metrics. */ list_inithead(&gcpufreq_list); DIR *dir = opendir("/sys/devices/system/cpu"); - if (!dir) + if (!dir) { + pipe_mutex_unlock(gcpufreq_mutex); return 0; + } while ((dp = readdir(dir)) != NULL) { @@ -238,6 +232,7 @@ hud_get_num_cpufreq(bool displayhelp) snprintf(fn, sizeof(fn), "%s/cpufreq/scaling_max_freq", basename); add_object(dp->d_name, fn, CPUFREQ_MAXIMUM, cpu_index); } + closedir(dir); if (displayhelp) { list_for_each_entry(struct cpufreq_info, cfi, &gcpufreq_list, list) { @@ -251,6 +246,7 @@ hud_get_num_cpufreq(bool displayhelp) } } + pipe_mutex_unlock(gcpufreq_mutex); return gcpufreq_count; } diff --git a/src/gallium/auxiliary/hud/hud_diskstat.c b/src/gallium/auxiliary/hud/hud_diskstat.c index b248baf..af6e62d 100644 --- a/src/gallium/auxiliary/hud/hud_diskstat.c +++ b/src/gallium/auxiliary/hud/hud_diskstat.c @@ -35,6 +35,7 @@ #include "hud/hud_private.h" #include "util/list.h" #include "os/os_time.h" +#include "os/os_thread.h" #include "util/u_memory.h" #include <stdio.h> #include <unistd.h> @@ -81,6 +82,7 @@ struct diskstat_info */ static int gdiskstat_count = 0; static struct list_head gdiskstat_list; +pipe_static_mutex(gdiskstat_mutex); static struct diskstat_info * find_dsi_by_name(const char *n, int mode) @@ -162,14 +164,6 @@ query_dsi_load(struct hud_graph *gr) } } -static void -free_query_data(void *p) -{ - struct diskstat_info *nic = (struct diskstat_info *) p; - list_del(&nic->list); - FREE(nic); -} - /** * Create and initialize a new object for a specific block I/O device. * \param pane parent context. @@ -208,11 +202,6 @@ hud_diskstat_graph_install(struct hud_pane *pane, const char *dev_name, gr->query_data = dsi; gr->query_new_value = query_dsi_load; - /* Don't use free() as our callback as that messes up Gallium's - * memory debugger. Use simple free_query_data() wrapper. - */ - gr->free_query_data = free_query_data; - hud_pane_add_graph(pane, gr); hud_pane_set_max_value(pane, 100); } @@ -257,16 +246,21 @@ hud_get_num_disks(bool displayhelp) char name[64]; /* Return the number of block devices and partitions. */ - if (gdiskstat_count) + pipe_mutex_lock(gdiskstat_mutex); + if (gdiskstat_count) { + pipe_mutex_unlock(gdiskstat_mutex); return gdiskstat_count; + } /* Scan /sys/block, for every object type we support, create and * persist an object to represent its different statistics. */ list_inithead(&gdiskstat_list); DIR *dir = opendir("/sys/block/"); - if (!dir) + if (!dir) { + pipe_mutex_unlock(gdiskstat_mutex); return 0; + } while ((dp = readdir(dir)) != NULL) { @@ -290,8 +284,11 @@ hud_get_num_disks(bool displayhelp) /* Add any partitions */ struct dirent *dpart; DIR *pdir = opendir(basename); - if (!pdir) + if (!pdir) { + pipe_mutex_unlock(gdiskstat_mutex); + closedir(dir); return 0; + } while ((dpart = readdir(pdir)) != NULL) { /* Avoid 'lo' and '..' and '.' */ @@ -311,6 +308,7 @@ hud_get_num_disks(bool displayhelp) add_object_part(basename, dpart->d_name, DISKSTAT_WR); } } + closedir(dir); if (displayhelp) { list_for_each_entry(struct diskstat_info, dsi, &gdiskstat_list, list) { @@ -322,6 +320,7 @@ hud_get_num_disks(bool displayhelp) puts(line); } } + pipe_mutex_unlock(gdiskstat_mutex); return gdiskstat_count; } diff --git a/src/gallium/auxiliary/hud/hud_nic.c b/src/gallium/auxiliary/hud/hud_nic.c index fb6b8c0..f9935de 100644 --- a/src/gallium/auxiliary/hud/hud_nic.c +++ b/src/gallium/auxiliary/hud/hud_nic.c @@ -35,6 +35,7 @@ #include "hud/hud_private.h" #include "util/list.h" #include "os/os_time.h" +#include "os/os_thread.h" #include "util/u_memory.h" #include <stdio.h> #include <unistd.h> @@ -66,6 +67,7 @@ struct nic_info */ static int gnic_count = 0; static struct list_head gnic_list; +pipe_static_mutex(gnic_mutex); static struct nic_info * find_nic_by_name(const char *n, int mode) @@ -234,14 +236,6 @@ query_nic_load(struct hud_graph *gr) } } -static void -free_query_data(void *p) -{ - struct nic_info *nic = (struct nic_info *) p; - list_del(&nic->list); - FREE(nic); -} - /** * Create and initialize a new object for a specific network interface dev. * \param pane parent context. @@ -284,11 +278,6 @@ hud_nic_graph_install(struct hud_pane *pane, const char *nic_name, gr->query_data = nic; gr->query_new_value = query_nic_load; - /* Don't use free() as our callback as that messes up Gallium's - * memory debugger. Use simple free_query_data() wrapper. - */ - gr->free_query_data = free_query_data; - hud_pane_add_graph(pane, gr); hud_pane_set_max_value(pane, 100); } @@ -342,16 +331,21 @@ hud_get_num_nics(bool displayhelp) char name[64]; /* Return the number if network interfaces. */ - if (gnic_count) + pipe_mutex_lock(gnic_mutex); + if (gnic_count) { + pipe_mutex_unlock(gnic_mutex); return gnic_count; + } /* Scan /sys/block, for every object type we support, create and * persist an object to represent its different statistics. */ list_inithead(&gnic_list); DIR *dir = opendir("/sys/class/net/"); - if (!dir) + if (!dir) { + pipe_mutex_unlock(gnic_mutex); return 0; + } while ((dp = readdir(dir)) != NULL) { @@ -412,6 +406,7 @@ hud_get_num_nics(bool displayhelp) } } + closedir(dir); list_for_each_entry(struct nic_info, nic, &gnic_list, list) { char line[64]; @@ -424,6 +419,7 @@ hud_get_num_nics(bool displayhelp) } + pipe_mutex_unlock(gnic_mutex); return gnic_count; } diff --git a/src/gallium/auxiliary/hud/hud_sensors_temp.c b/src/gallium/auxiliary/hud/hud_sensors_temp.c index e41b847..11b8a4c 100644 --- a/src/gallium/auxiliary/hud/hud_sensors_temp.c +++ b/src/gallium/auxiliary/hud/hud_sensors_temp.c @@ -32,6 +32,7 @@ #include "hud/hud_private.h" #include "util/list.h" #include "os/os_time.h" +#include "os/os_thread.h" #include "util/u_memory.h" #include <stdio.h> #include <unistd.h> @@ -49,6 +50,7 @@ */ static int gsensors_temp_count = 0; static struct list_head gsensors_temp_list; +pipe_static_mutex(gsensor_temp_mutex); struct sensors_temp_info { @@ -189,17 +191,6 @@ query_sti_load(struct hud_graph *gr) } } -static void -free_query_data(void *p) -{ - struct sensors_temp_info *sti = (struct sensors_temp_info *) p; - list_del(&sti->list); - if (sti->chip) - sensors_free_chip_name(sti->chip); - FREE(sti); - sensors_cleanup(); -} - /** * Create and initialize a new object for a specific sensor interface dev. * \param pane parent context. @@ -237,11 +228,6 @@ hud_sensors_temp_graph_install(struct hud_pane *pane, const char *dev_name, gr->query_data = sti; gr->query_new_value = query_sti_load; - /* Don't use free() as our callback as that messes up Gallium's - * memory debugger. Use simple free_query_data() wrapper. - */ - gr->free_query_data = free_query_data; - hud_pane_add_graph(pane, gr); switch (sti->mode) { case SENSORS_TEMP_CURRENT: @@ -338,12 +324,17 @@ int hud_get_num_sensors(bool displayhelp) { /* Return the number of sensors detected. */ - if (gsensors_temp_count) + pipe_mutex_lock(gsensor_temp_mutex); + if (gsensors_temp_count) { + pipe_mutex_unlock(gsensor_temp_mutex); return gsensors_temp_count; + } int ret = sensors_init(NULL); - if (ret) + if (ret) { + pipe_mutex_unlock(gsensor_temp_mutex); return 0; + } list_inithead(&gsensors_temp_list); @@ -377,6 +368,7 @@ hud_get_num_sensors(bool displayhelp) } } + pipe_mutex_unlock(gsensor_temp_mutex); return gsensors_temp_count; } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index db41f56..dd8f83b 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -487,7 +487,9 @@ si_decompress_sampler_color_textures(struct si_context *sctx, assert(view); tex = (struct r600_texture *)view->texture; - assert(tex->cmask.size || tex->fmask.size || tex->dcc_offset); + /* CMASK or DCC can be discarded and we can still end up here. */ + if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset) + continue; si_blit_decompress_color(&sctx->b.b, tex, view->u.tex.first_level, view->u.tex.last_level, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 123ff5d..18e905b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -459,6 +459,8 @@ static void emit_bfi(const struct lp_build_tgsi_action *action, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef bfi_args[3]; + LLVMValueRef bfi_sm5; + LLVMValueRef cond; // Calculate the bitmask: (((1 << src3) - 1) << src2 bfi_args[0] = LLVMBuildShl(builder, @@ -478,11 +480,40 @@ static void emit_bfi(const struct lp_build_tgsi_action *action, * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) * Use the right-hand side, which the LLVM backend can convert to V_BFI. */ - emit_data->output[emit_data->chan] = + bfi_sm5 = LLVMBuildXor(builder, bfi_args[2], LLVMBuildAnd(builder, bfi_args[0], LLVMBuildXor(builder, bfi_args[1], bfi_args[2], ""), ""), ""); + + /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend + * uses the convenient V_BFI lowering for the above, which follows SM5 + * and disagrees with GLSL semantics when bits (src3) is 32. + */ + cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3], + lp_build_const_int32(gallivm, 32), ""); + emit_data->output[emit_data->chan] = + LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, ""); +} + +static void emit_bfe(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef bfe_sm5; + LLVMValueRef cond; + + bfe_sm5 = lp_build_intrinsic(builder, action->intr_name, + emit_data->dst_type, emit_data->args, + emit_data->arg_count, LLVMReadNoneAttribute); + + /* Correct for GLSL semantics. */ + cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2], + lp_build_const_int32(gallivm, 32), ""); + emit_data->output[emit_data->chan] = + LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, ""); } /* this is ffs in C */ @@ -783,7 +814,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp; bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp; bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs; - bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe; bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32"; bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int; @@ -835,7 +866,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32"; bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; - bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe; bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32"; bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 81ac070..0145488 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1370,7 +1370,7 @@ emit_vert_end(struct vc4_compile *c, struct vc4_varying_slot *fs_inputs, uint32_t num_fs_inputs) { - struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]); + struct qreg rcp_w = ntq_rcp(c, c->outputs[c->output_position_index + 3]); emit_stub_vpm_read(c); diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c index f4d62a3..8c29a3f 100644 --- a/src/gallium/state_trackers/vdpau/output.c +++ b/src/gallium/state_trackers/vdpau/output.c @@ -82,7 +82,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device, res_tmpl.depth0 = 1; res_tmpl.array_size = 1; res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET | - PIPE_BIND_LINEAR | PIPE_BIND_SHARED; + PIPE_BIND_SHARED; res_tmpl.usage = PIPE_USAGE_DEFAULT; pipe_mutex_lock(dev->mutex); diff --git a/src/glx/g_glxglvnddispatchfuncs.c b/src/glx/g_glxglvnddispatchfuncs.c index e6b9c0b..b5e3398 100644 --- a/src/glx/g_glxglvnddispatchfuncs.c +++ b/src/glx/g_glxglvnddispatchfuncs.c @@ -17,16 +17,19 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = { #define __ATTRIB(field) \ [DI_##field] = "glX"#field + __ATTRIB(BindSwapBarrierSGIX), __ATTRIB(BindTexImageEXT), // glXChooseFBConfig implemented by libglvnd __ATTRIB(ChooseFBConfigSGIX), // glXChooseVisual implemented by libglvnd // glXCopyContext implemented by libglvnd + __ATTRIB(CopySubBufferMESA), // glXCreateContext implemented by libglvnd __ATTRIB(CreateContextAttribsARB), __ATTRIB(CreateContextWithConfigSGIX), __ATTRIB(CreateGLXPbufferSGIX), // glXCreateGLXPixmap implemented by libglvnd + __ATTRIB(CreateGLXPixmapMESA), __ATTRIB(CreateGLXPixmapWithConfigSGIX), // glXCreateNewContext implemented by libglvnd // glXCreatePbuffer implemented by libglvnd @@ -51,54 +54,50 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = { __ATTRIB(GetFBConfigAttribSGIX), __ATTRIB(GetFBConfigFromVisualSGIX), // glXGetFBConfigs implemented by libglvnd + __ATTRIB(GetMscRateOML), // glXGetProcAddress implemented by libglvnd // glXGetProcAddressARB implemented by libglvnd + __ATTRIB(GetScreenDriver), // glXGetSelectedEvent implemented by libglvnd __ATTRIB(GetSelectedEventSGIX), + __ATTRIB(GetSwapIntervalMESA), + __ATTRIB(GetSyncValuesOML), __ATTRIB(GetVideoSyncSGI), // glXGetVisualFromFBConfig implemented by libglvnd __ATTRIB(GetVisualFromFBConfigSGIX), // glXImportContextEXT implemented by libglvnd // glXIsDirect implemented by libglvnd + __ATTRIB(JoinSwapGroupSGIX), // glXMakeContextCurrent implemented by libglvnd // glXMakeCurrent implemented by libglvnd // glXQueryContext implemented by libglvnd __ATTRIB(QueryContextInfoEXT), + __ATTRIB(QueryCurrentRendererIntegerMESA), + __ATTRIB(QueryCurrentRendererStringMESA), // glXQueryDrawable implemented by libglvnd // glXQueryExtension implemented by libglvnd // glXQueryExtensionsString implemented by libglvnd __ATTRIB(QueryGLXPbufferSGIX), + __ATTRIB(QueryMaxSwapBarriersSGIX), + __ATTRIB(QueryRendererIntegerMESA), + __ATTRIB(QueryRendererStringMESA), // glXQueryServerString implemented by libglvnd // glXQueryVersion implemented by libglvnd + __ATTRIB(ReleaseBuffersMESA), __ATTRIB(ReleaseTexImageEXT), // glXSelectEvent implemented by libglvnd __ATTRIB(SelectEventSGIX), // glXSwapBuffers implemented by libglvnd + __ATTRIB(SwapBuffersMscOML), + __ATTRIB(SwapIntervalMESA), __ATTRIB(SwapIntervalSGI), // glXUseXFont implemented by libglvnd + __ATTRIB(WaitForMscOML), + __ATTRIB(WaitForSbcOML), // glXWaitGL implemented by libglvnd __ATTRIB(WaitVideoSyncSGI), // glXWaitX implemented by libglvnd - __ATTRIB(glXBindSwapBarrierSGIX), - __ATTRIB(glXCopySubBufferMESA), - __ATTRIB(glXCreateGLXPixmapMESA), - __ATTRIB(glXGetMscRateOML), - __ATTRIB(glXGetScreenDriver), - __ATTRIB(glXGetSwapIntervalMESA), - __ATTRIB(glXGetSyncValuesOML), - __ATTRIB(glXJoinSwapGroupSGIX), - __ATTRIB(glXQueryCurrentRendererIntegerMESA), - __ATTRIB(glXQueryCurrentRendererStringMESA), - __ATTRIB(glXQueryMaxSwapBarriersSGIX), - __ATTRIB(glXQueryRendererIntegerMESA), - __ATTRIB(glXQueryRendererStringMESA), - __ATTRIB(glXReleaseBuffersMESA), - __ATTRIB(glXSwapBuffersMscOML), - __ATTRIB(glXSwapIntervalMESA), - __ATTRIB(glXWaitForMscOML), - __ATTRIB(glXWaitForSbcOML), - #undef __ATTRIB }; @@ -557,49 +556,49 @@ static int dispatch_WaitVideoSyncSGI(int divisor, int remainder, -static void dispatch_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, +static void dispatch_BindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier) { - PFNGLXBINDSWAPBARRIERSGIXPROC pglXBindSwapBarrierSGIX; + PFNGLXBINDSWAPBARRIERSGIXPROC pBindSwapBarrierSGIX; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, drawable); if (dd == NULL) return; - __FETCH_FUNCTION_PTR(glXBindSwapBarrierSGIX); - if (pglXBindSwapBarrierSGIX == NULL) + __FETCH_FUNCTION_PTR(BindSwapBarrierSGIX); + if (pBindSwapBarrierSGIX == NULL) return; - (*pglXBindSwapBarrierSGIX)(dpy, drawable, barrier); + (*pBindSwapBarrierSGIX)(dpy, drawable, barrier); } -static void dispatch_glXCopySubBufferMESA(Display *dpy, GLXDrawable drawable, +static void dispatch_CopySubBufferMESA(Display *dpy, GLXDrawable drawable, int x, int y, int width, int height) { - PFNGLXCOPYSUBBUFFERMESAPROC pglXCopySubBufferMESA; + PFNGLXCOPYSUBBUFFERMESAPROC pCopySubBufferMESA; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, drawable); if (dd == NULL) return; - __FETCH_FUNCTION_PTR(glXCopySubBufferMESA); - if (pglXCopySubBufferMESA == NULL) + __FETCH_FUNCTION_PTR(CopySubBufferMESA); + if (pCopySubBufferMESA == NULL) return; - (*pglXCopySubBufferMESA)(dpy, drawable, x, y, width, height); + (*pCopySubBufferMESA)(dpy, drawable, x, y, width, height); } -static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy, +static GLXPixmap dispatch_CreateGLXPixmapMESA(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap) { - PFNGLXCREATEGLXPIXMAPMESAPROC pglXCreateGLXPixmapMESA; + PFNGLXCREATEGLXPIXMAPMESAPROC pCreateGLXPixmapMESA; __GLXvendorInfo *dd; GLXPixmap ret; @@ -607,11 +606,11 @@ static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy, if (dd == NULL) return None; - __FETCH_FUNCTION_PTR(glXCreateGLXPixmapMESA); - if (pglXCreateGLXPixmapMESA == NULL) + __FETCH_FUNCTION_PTR(CreateGLXPixmapMESA); + if (pCreateGLXPixmapMESA == NULL) return None; - ret = (*pglXCreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap); + ret = (*pCreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap); if (AddDrawableMapping(dpy, ret, dd)) { /* XXX: Call glXDestroyGLXPixmap which lives in libglvnd. If we're not * allowed to call it from here, should we extend __glXDispatchTableIndices ? @@ -624,47 +623,47 @@ static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy, -static GLboolean dispatch_glXGetMscRateOML(Display *dpy, GLXDrawable drawable, +static GLboolean dispatch_GetMscRateOML(Display *dpy, GLXDrawable drawable, int32_t *numerator, int32_t *denominator) { - PFNGLXGETMSCRATEOMLPROC pglXGetMscRateOML; + PFNGLXGETMSCRATEOMLPROC pGetMscRateOML; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, drawable); if (dd == NULL) return GL_FALSE; - __FETCH_FUNCTION_PTR(glXGetMscRateOML); - if (pglXGetMscRateOML == NULL) + __FETCH_FUNCTION_PTR(GetMscRateOML); + if (pGetMscRateOML == NULL) return GL_FALSE; - return (*pglXGetMscRateOML)(dpy, drawable, numerator, denominator); + return (*pGetMscRateOML)(dpy, drawable, numerator, denominator); } -static const char *dispatch_glXGetScreenDriver(Display *dpy, int scrNum) +static const char *dispatch_GetScreenDriver(Display *dpy, int scrNum) { typedef const char *(*fn_glXGetScreenDriver_ptr)(Display *dpy, int scrNum); - fn_glXGetScreenDriver_ptr pglXGetScreenDriver; + fn_glXGetScreenDriver_ptr pGetScreenDriver; __GLXvendorInfo *dd; dd = __VND->getDynDispatch(dpy, scrNum); if (dd == NULL) return NULL; - __FETCH_FUNCTION_PTR(glXGetScreenDriver); - if (pglXGetScreenDriver == NULL) + __FETCH_FUNCTION_PTR(GetScreenDriver); + if (pGetScreenDriver == NULL) return NULL; - return (*pglXGetScreenDriver)(dpy, scrNum); + return (*pGetScreenDriver)(dpy, scrNum); } -static int dispatch_glXGetSwapIntervalMESA(void) +static int dispatch_GetSwapIntervalMESA(void) { - PFNGLXGETSWAPINTERVALMESAPROC pglXGetSwapIntervalMESA; + PFNGLXGETSWAPINTERVALMESAPROC pGetSwapIntervalMESA; __GLXvendorInfo *dd; if (!__VND->getCurrentContext()) @@ -674,57 +673,57 @@ static int dispatch_glXGetSwapIntervalMESA(void) if (dd == NULL) return 0; - __FETCH_FUNCTION_PTR(glXGetSwapIntervalMESA); - if (pglXGetSwapIntervalMESA == NULL) + __FETCH_FUNCTION_PTR(GetSwapIntervalMESA); + if (pGetSwapIntervalMESA == NULL) return 0; - return (*pglXGetSwapIntervalMESA)(); + return (*pGetSwapIntervalMESA)(); } -static Bool dispatch_glXGetSyncValuesOML(Display *dpy, GLXDrawable drawable, +static Bool dispatch_GetSyncValuesOML(Display *dpy, GLXDrawable drawable, int64_t *ust, int64_t *msc, int64_t *sbc) { - PFNGLXGETSYNCVALUESOMLPROC pglXGetSyncValuesOML; + PFNGLXGETSYNCVALUESOMLPROC pGetSyncValuesOML; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, drawable); if (dd == NULL) return False; - __FETCH_FUNCTION_PTR(glXGetSyncValuesOML); - if (pglXGetSyncValuesOML == NULL) + __FETCH_FUNCTION_PTR(GetSyncValuesOML); + if (pGetSyncValuesOML == NULL) return False; - return (*pglXGetSyncValuesOML)(dpy, drawable, ust, msc, sbc); + return (*pGetSyncValuesOML)(dpy, drawable, ust, msc, sbc); } -static void dispatch_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, +static void dispatch_JoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member) { - PFNGLXJOINSWAPGROUPSGIXPROC pglXJoinSwapGroupSGIX; + PFNGLXJOINSWAPGROUPSGIXPROC pJoinSwapGroupSGIX; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, drawable); if (dd == NULL) return; - __FETCH_FUNCTION_PTR(glXJoinSwapGroupSGIX); - if (pglXJoinSwapGroupSGIX == NULL) + __FETCH_FUNCTION_PTR(JoinSwapGroupSGIX); + if (pJoinSwapGroupSGIX == NULL) return; - (*pglXJoinSwapGroupSGIX)(dpy, drawable, member); + (*pJoinSwapGroupSGIX)(dpy, drawable, member); } -static Bool dispatch_glXQueryCurrentRendererIntegerMESA(int attribute, +static Bool dispatch_QueryCurrentRendererIntegerMESA(int attribute, unsigned int *value) { - PFNGLXQUERYCURRENTRENDERERINTEGERMESAPROC pglXQueryCurrentRendererIntegerMESA; + PFNGLXQUERYCURRENTRENDERERINTEGERMESAPROC pQueryCurrentRendererIntegerMESA; __GLXvendorInfo *dd; if (!__VND->getCurrentContext()) @@ -734,18 +733,18 @@ static Bool dispatch_glXQueryCurrentRendererIntegerMESA(int attribute, if (dd == NULL) return False; - __FETCH_FUNCTION_PTR(glXQueryCurrentRendererIntegerMESA); - if (pglXQueryCurrentRendererIntegerMESA == NULL) + __FETCH_FUNCTION_PTR(QueryCurrentRendererIntegerMESA); + if (pQueryCurrentRendererIntegerMESA == NULL) return False; - return (*pglXQueryCurrentRendererIntegerMESA)(attribute, value); + return (*pQueryCurrentRendererIntegerMESA)(attribute, value); } -static const char *dispatch_glXQueryCurrentRendererStringMESA(int attribute) +static const char *dispatch_QueryCurrentRendererStringMESA(int attribute) { - PFNGLXQUERYCURRENTRENDERERSTRINGMESAPROC pglXQueryCurrentRendererStringMESA; + PFNGLXQUERYCURRENTRENDERERSTRINGMESAPROC pQueryCurrentRendererStringMESA; __GLXvendorInfo *dd; if (!__VND->getCurrentContext()) @@ -755,114 +754,114 @@ static const char *dispatch_glXQueryCurrentRendererStringMESA(int attribute) if (dd == NULL) return NULL; - __FETCH_FUNCTION_PTR(glXQueryCurrentRendererStringMESA); - if (pglXQueryCurrentRendererStringMESA == NULL) + __FETCH_FUNCTION_PTR(QueryCurrentRendererStringMESA); + if (pQueryCurrentRendererStringMESA == NULL) return NULL; - return (*pglXQueryCurrentRendererStringMESA)(attribute); + return (*pQueryCurrentRendererStringMESA)(attribute); } -static Bool dispatch_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, +static Bool dispatch_QueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max) { - PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC pglXQueryMaxSwapBarriersSGIX; + PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC pQueryMaxSwapBarriersSGIX; __GLXvendorInfo *dd; dd = __VND->getDynDispatch(dpy, screen); if (dd == NULL) return False; - __FETCH_FUNCTION_PTR(glXQueryMaxSwapBarriersSGIX); - if (pglXQueryMaxSwapBarriersSGIX == NULL) + __FETCH_FUNCTION_PTR(QueryMaxSwapBarriersSGIX); + if (pQueryMaxSwapBarriersSGIX == NULL) return False; - return (*pglXQueryMaxSwapBarriersSGIX)(dpy, screen, max); + return (*pQueryMaxSwapBarriersSGIX)(dpy, screen, max); } -static Bool dispatch_glXQueryRendererIntegerMESA(Display *dpy, int screen, +static Bool dispatch_QueryRendererIntegerMESA(Display *dpy, int screen, int renderer, int attribute, unsigned int *value) { - PFNGLXQUERYRENDERERINTEGERMESAPROC pglXQueryRendererIntegerMESA; + PFNGLXQUERYRENDERERINTEGERMESAPROC pQueryRendererIntegerMESA; __GLXvendorInfo *dd; dd = __VND->getDynDispatch(dpy, screen); if (dd == NULL) return False; - __FETCH_FUNCTION_PTR(glXQueryRendererIntegerMESA); - if (pglXQueryRendererIntegerMESA == NULL) + __FETCH_FUNCTION_PTR(QueryRendererIntegerMESA); + if (pQueryRendererIntegerMESA == NULL) return False; - return (*pglXQueryRendererIntegerMESA)(dpy, screen, renderer, attribute, value); + return (*pQueryRendererIntegerMESA)(dpy, screen, renderer, attribute, value); } -static const char *dispatch_glXQueryRendererStringMESA(Display *dpy, int screen, +static const char *dispatch_QueryRendererStringMESA(Display *dpy, int screen, int renderer, int attribute) { - PFNGLXQUERYRENDERERSTRINGMESAPROC pglXQueryRendererStringMESA; + PFNGLXQUERYRENDERERSTRINGMESAPROC pQueryRendererStringMESA; __GLXvendorInfo *dd = NULL; dd = __VND->getDynDispatch(dpy, screen); if (dd == NULL) return NULL; - __FETCH_FUNCTION_PTR(glXQueryRendererStringMESA); - if (pglXQueryRendererStringMESA == NULL) + __FETCH_FUNCTION_PTR(QueryRendererStringMESA); + if (pQueryRendererStringMESA == NULL) return NULL; - return (*pglXQueryRendererStringMESA)(dpy, screen, renderer, attribute); + return (*pQueryRendererStringMESA)(dpy, screen, renderer, attribute); } -static Bool dispatch_glXReleaseBuffersMESA(Display *dpy, GLXDrawable d) +static Bool dispatch_ReleaseBuffersMESA(Display *dpy, GLXDrawable d) { - PFNGLXRELEASEBUFFERSMESAPROC pglXReleaseBuffersMESA; + PFNGLXRELEASEBUFFERSMESAPROC pReleaseBuffersMESA; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, d); if (dd == NULL) return False; - __FETCH_FUNCTION_PTR(glXReleaseBuffersMESA); - if (pglXReleaseBuffersMESA == NULL) + __FETCH_FUNCTION_PTR(ReleaseBuffersMESA); + if (pReleaseBuffersMESA == NULL) return False; - return (*pglXReleaseBuffersMESA)(dpy, d); + return (*pReleaseBuffersMESA)(dpy, d); } -static int64_t dispatch_glXSwapBuffersMscOML(Display *dpy, GLXDrawable drawable, +static int64_t dispatch_SwapBuffersMscOML(Display *dpy, GLXDrawable drawable, int64_t target_msc, int64_t divisor, int64_t remainder) { - PFNGLXSWAPBUFFERSMSCOMLPROC pglXSwapBuffersMscOML; + PFNGLXSWAPBUFFERSMSCOMLPROC pSwapBuffersMscOML; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, drawable); if (dd == NULL) return 0; - __FETCH_FUNCTION_PTR(glXSwapBuffersMscOML); - if (pglXSwapBuffersMscOML == NULL) + __FETCH_FUNCTION_PTR(SwapBuffersMscOML); + if (pSwapBuffersMscOML == NULL) return 0; - return (*pglXSwapBuffersMscOML)(dpy, drawable, target_msc, divisor, remainder); + return (*pSwapBuffersMscOML)(dpy, drawable, target_msc, divisor, remainder); } -static int dispatch_glXSwapIntervalMESA(unsigned int interval) +static int dispatch_SwapIntervalMESA(unsigned int interval) { - PFNGLXSWAPINTERVALMESAPROC pglXSwapIntervalMESA; + PFNGLXSWAPINTERVALMESAPROC pSwapIntervalMESA; __GLXvendorInfo *dd; if (!__VND->getCurrentContext()) @@ -872,52 +871,52 @@ static int dispatch_glXSwapIntervalMESA(unsigned int interval) if (dd == NULL) return 0; - __FETCH_FUNCTION_PTR(glXSwapIntervalMESA); - if (pglXSwapIntervalMESA == NULL) + __FETCH_FUNCTION_PTR(SwapIntervalMESA); + if (pSwapIntervalMESA == NULL) return 0; - return (*pglXSwapIntervalMESA)(interval); + return (*pSwapIntervalMESA)(interval); } -static Bool dispatch_glXWaitForMscOML(Display *dpy, GLXDrawable drawable, +static Bool dispatch_WaitForMscOML(Display *dpy, GLXDrawable drawable, int64_t target_msc, int64_t divisor, int64_t remainder, int64_t *ust, int64_t *msc, int64_t *sbc) { - PFNGLXWAITFORMSCOMLPROC pglXWaitForMscOML; + PFNGLXWAITFORMSCOMLPROC pWaitForMscOML; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, drawable); if (dd == NULL) return False; - __FETCH_FUNCTION_PTR(glXWaitForMscOML); - if (pglXWaitForMscOML == NULL) + __FETCH_FUNCTION_PTR(WaitForMscOML); + if (pWaitForMscOML == NULL) return False; - return (*pglXWaitForMscOML)(dpy, drawable, target_msc, divisor, remainder, ust, msc, sbc); + return (*pWaitForMscOML)(dpy, drawable, target_msc, divisor, remainder, ust, msc, sbc); } -static Bool dispatch_glXWaitForSbcOML(Display *dpy, GLXDrawable drawable, +static Bool dispatch_WaitForSbcOML(Display *dpy, GLXDrawable drawable, int64_t target_sbc, int64_t *ust, int64_t *msc, int64_t *sbc) { - PFNGLXWAITFORSBCOMLPROC pglXWaitForSbcOML; + PFNGLXWAITFORSBCOMLPROC pWaitForSbcOML; __GLXvendorInfo *dd; dd = GetDispatchFromDrawable(dpy, drawable); if (dd == NULL) return False; - __FETCH_FUNCTION_PTR(glXWaitForSbcOML); - if (pglXWaitForSbcOML == NULL) + __FETCH_FUNCTION_PTR(WaitForSbcOML); + if (pWaitForSbcOML == NULL) return False; - return (*pglXWaitForSbcOML)(dpy, drawable, target_sbc, ust, msc, sbc); + return (*pWaitForSbcOML)(dpy, drawable, target_sbc, ust, msc, sbc); } #undef __FETCH_FUNCTION_PTR @@ -928,45 +927,44 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = { #define __ATTRIB(field) \ [DI_##field] = (void *)dispatch_##field - __ATTRIB(BindTexImageEXT), + __ATTRIB(BindSwapBarrierSGIX), __ATTRIB(BindTexImageEXT), __ATTRIB(ChooseFBConfigSGIX), + __ATTRIB(CopySubBufferMESA), __ATTRIB(CreateContextAttribsARB), __ATTRIB(CreateContextWithConfigSGIX), __ATTRIB(CreateGLXPbufferSGIX), + __ATTRIB(CreateGLXPixmapMESA), __ATTRIB(CreateGLXPixmapWithConfigSGIX), __ATTRIB(DestroyGLXPbufferSGIX), __ATTRIB(GetContextIDEXT), __ATTRIB(GetCurrentDisplayEXT), __ATTRIB(GetFBConfigAttribSGIX), __ATTRIB(GetFBConfigFromVisualSGIX), + __ATTRIB(GetMscRateOML), + __ATTRIB(GetScreenDriver), __ATTRIB(GetSelectedEventSGIX), + __ATTRIB(GetSwapIntervalMESA), + __ATTRIB(GetSyncValuesOML), __ATTRIB(GetVideoSyncSGI), __ATTRIB(GetVisualFromFBConfigSGIX), + __ATTRIB(JoinSwapGroupSGIX), __ATTRIB(QueryContextInfoEXT), + __ATTRIB(QueryCurrentRendererIntegerMESA), + __ATTRIB(QueryCurrentRendererStringMESA), __ATTRIB(QueryGLXPbufferSGIX), + __ATTRIB(QueryMaxSwapBarriersSGIX), + __ATTRIB(QueryRendererIntegerMESA), + __ATTRIB(QueryRendererStringMESA), + __ATTRIB(ReleaseBuffersMESA), __ATTRIB(ReleaseTexImageEXT), __ATTRIB(SelectEventSGIX), + __ATTRIB(SwapBuffersMscOML), + __ATTRIB(SwapIntervalMESA), __ATTRIB(SwapIntervalSGI), + __ATTRIB(WaitForMscOML), + __ATTRIB(WaitForSbcOML), __ATTRIB(WaitVideoSyncSGI), - __ATTRIB(glXBindSwapBarrierSGIX), - __ATTRIB(glXCopySubBufferMESA), - __ATTRIB(glXCreateGLXPixmapMESA), - __ATTRIB(glXGetMscRateOML), - __ATTRIB(glXGetScreenDriver), - __ATTRIB(glXGetSwapIntervalMESA), - __ATTRIB(glXGetSyncValuesOML), - __ATTRIB(glXJoinSwapGroupSGIX), - __ATTRIB(glXQueryCurrentRendererIntegerMESA), - __ATTRIB(glXQueryCurrentRendererStringMESA), - __ATTRIB(glXQueryMaxSwapBarriersSGIX), - __ATTRIB(glXQueryRendererIntegerMESA), - __ATTRIB(glXQueryRendererStringMESA), - __ATTRIB(glXReleaseBuffersMESA), - __ATTRIB(glXSwapBuffersMscOML), - __ATTRIB(glXSwapIntervalMESA), - __ATTRIB(glXWaitForMscOML), - __ATTRIB(glXWaitForSbcOML), [DI_LAST_INDEX] = NULL, #undef __ATTRIB diff --git a/src/glx/g_glxglvnddispatchindices.h b/src/glx/g_glxglvnddispatchindices.h index fd2156e..0891654 100644 --- a/src/glx/g_glxglvnddispatchindices.h +++ b/src/glx/g_glxglvnddispatchindices.h @@ -6,16 +6,19 @@ #define __glxlibglvnd_dispatchindex_h__ typedef enum __GLXdispatchIndex { + DI_BindSwapBarrierSGIX, DI_BindTexImageEXT, // ChooseFBConfig implemented by libglvnd DI_ChooseFBConfigSGIX, // ChooseVisual implemented by libglvnd // CopyContext implemented by libglvnd + DI_CopySubBufferMESA, // CreateContext implemented by libglvnd DI_CreateContextAttribsARB, DI_CreateContextWithConfigSGIX, DI_CreateGLXPbufferSGIX, // CreateGLXPixmap implemented by libglvnd + DI_CreateGLXPixmapMESA, DI_CreateGLXPixmapWithConfigSGIX, // CreateNewContext implemented by libglvnd // CreatePbuffer implemented by libglvnd @@ -40,6 +43,7 @@ typedef enum __GLXdispatchIndex { DI_GetFBConfigAttribSGIX, DI_GetFBConfigFromVisualSGIX, // GetFBConfigs implemented by libglvnd + DI_GetMscRateOML, // GetProcAddress implemented by libglvnd // GetProcAddressARB implemented by libglvnd // GetSelectedEvent implemented by libglvnd @@ -47,45 +51,41 @@ typedef enum __GLXdispatchIndex { DI_GetVideoSyncSGI, // GetVisualFromFBConfig implemented by libglvnd DI_GetVisualFromFBConfigSGIX, + DI_GetScreenDriver, + DI_GetSwapIntervalMESA, + DI_GetSyncValuesOML, // ImportContextEXT implemented by libglvnd // IsDirect implemented by libglvnd + DI_JoinSwapGroupSGIX, // MakeContextCurrent implemented by libglvnd // MakeCurrent implemented by libglvnd // QueryContext implemented by libglvnd DI_QueryContextInfoEXT, + DI_QueryCurrentRendererIntegerMESA, + DI_QueryCurrentRendererStringMESA, // QueryDrawable implemented by libglvnd // QueryExtension implemented by libglvnd // QueryExtensionsString implemented by libglvnd DI_QueryGLXPbufferSGIX, + DI_QueryMaxSwapBarriersSGIX, + DI_QueryRendererIntegerMESA, + DI_QueryRendererStringMESA, // QueryServerString implemented by libglvnd // QueryVersion implemented by libglvnd + DI_ReleaseBuffersMESA, DI_ReleaseTexImageEXT, // SelectEvent implemented by libglvnd DI_SelectEventSGIX, // SwapBuffers implemented by libglvnd + DI_SwapBuffersMscOML, + DI_SwapIntervalMESA, DI_SwapIntervalSGI, // UseXFont implemented by libglvnd // WaitGL implemented by libglvnd + DI_WaitForMscOML, + DI_WaitForSbcOML, DI_WaitVideoSyncSGI, // WaitX implemented by libglvnd - DI_glXBindSwapBarrierSGIX, - DI_glXCopySubBufferMESA, - DI_glXCreateGLXPixmapMESA, - DI_glXGetMscRateOML, - DI_glXGetScreenDriver, - DI_glXGetSwapIntervalMESA, - DI_glXGetSyncValuesOML, - DI_glXJoinSwapGroupSGIX, - DI_glXQueryCurrentRendererIntegerMESA, - DI_glXQueryCurrentRendererStringMESA, - DI_glXQueryMaxSwapBarriersSGIX, - DI_glXQueryRendererIntegerMESA, - DI_glXQueryRendererStringMESA, - DI_glXReleaseBuffersMESA, - DI_glXSwapBuffersMscOML, - DI_glXSwapIntervalMESA, - DI_glXWaitForMscOML, - DI_glXWaitForSbcOML, DI_LAST_INDEX } __GLXdispatchIndex; diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 6abe0b9..8980de3 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -2713,7 +2713,7 @@ __glXGetUST(int64_t * ust) #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) -int +PUBLIC int MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context, struct mesa_glinterop_device_info *out) { @@ -2737,7 +2737,7 @@ MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context, return ret; } -int +PUBLIC int MesaGLInteropGLXExportObject(Display *dpy, GLXContext context, struct mesa_glinterop_export_in *in, struct mesa_glinterop_export_out *out) diff --git a/src/glx/glxglvnd.c b/src/glx/glxglvnd.c index 098304d..2fc9b00 100644 --- a/src/glx/glxglvnd.c +++ b/src/glx/glxglvnd.c @@ -50,6 +50,9 @@ static void __glXGLVNDSetDispatchIndex(const GLubyte *procName, int index) { unsigned internalIndex = FindGLXFunction(procName); + if (internalIndex == DI_FUNCTION_COUNT) + return; /* unknown or static dispatch */ + __glXDispatchTableIndices[internalIndex] = index; } diff --git a/src/glx/windows/Makefile.am b/src/glx/windows/Makefile.am index c76af81..9806988 100644 --- a/src/glx/windows/Makefile.am +++ b/src/glx/windows/Makefile.am @@ -16,7 +16,8 @@ libwindowsglx_la_SOURCES = \ windowsgl.h \ windowsgl_internal.h \ windows_drawable.c \ - wgl.c + wgl.c \ + wgl.h libwindowsglx_la_CFLAGS = \ -I$(top_srcdir)/include \ diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c index 84f3d82..d9ec1c4 100644 --- a/src/intel/blorp/blorp.c +++ b/src/intel/blorp/blorp.c @@ -169,7 +169,7 @@ blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx, struct nir_shader *nir, const struct brw_wm_prog_key *wm_key, bool use_repclear, - struct brw_blorp_prog_data *prog_data, + struct brw_wm_prog_data *wm_prog_data, unsigned *program_size) { const struct brw_compiler *compiler = blorp->compiler; @@ -177,15 +177,14 @@ blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx, nir->options = compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions; - struct brw_wm_prog_data wm_prog_data; - memset(&wm_prog_data, 0, sizeof(wm_prog_data)); + memset(wm_prog_data, 0, sizeof(*wm_prog_data)); - wm_prog_data.base.nr_params = 0; - wm_prog_data.base.param = NULL; + wm_prog_data->base.nr_params = 0; + wm_prog_data->base.param = NULL; /* BLORP always just uses the first two binding table entries */ - wm_prog_data.binding_table.render_target_start = BLORP_RENDERBUFFER_BT_INDEX; - wm_prog_data.base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX; + wm_prog_data->binding_table.render_target_start = BLORP_RENDERBUFFER_BT_INDEX; + wm_prog_data->base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX; nir = brw_preprocess_nir(compiler, nir); nir_remove_dead_variables(nir, nir_var_shader_in); @@ -206,22 +205,9 @@ blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx, const unsigned *program = brw_compile_fs(compiler, blorp->driver_ctx, mem_ctx, - wm_key, &wm_prog_data, nir, + wm_key, wm_prog_data, nir, NULL, -1, -1, false, use_repclear, program_size, NULL); - /* Copy the relavent bits of wm_prog_data over into the blorp prog data */ - prog_data->dispatch_8 = wm_prog_data.dispatch_8; - prog_data->dispatch_16 = wm_prog_data.dispatch_16; - prog_data->first_curbe_grf_0 = wm_prog_data.base.dispatch_grf_start_reg; - prog_data->first_curbe_grf_2 = wm_prog_data.dispatch_grf_start_reg_2; - prog_data->ksp_offset_2 = wm_prog_data.prog_offset_2; - prog_data->persample_msaa_dispatch = wm_prog_data.persample_dispatch; - prog_data->flat_inputs = wm_prog_data.flat_inputs; - prog_data->num_varying_inputs = wm_prog_data.num_varying_inputs; - prog_data->inputs_read = nir->info.inputs_read; - - assert(wm_prog_data.base.nr_params == 0); - return program; } diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index 0c64d13..4351cb1 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -30,7 +30,7 @@ #include "isl/isl.h" struct brw_context; -struct brw_wm_prog_key; +struct brw_stage_prog_data; #ifdef __cplusplus extern "C" { @@ -58,7 +58,8 @@ struct blorp_context { void (*upload_shader)(struct blorp_context *blorp, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, uint32_t *kernel_out, void *prog_data_out); void (*exec)(struct blorp_batch *batch, const struct blorp_params *params); }; diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index cbccfc7..018d997 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -1237,7 +1237,7 @@ brw_blorp_get_blit_kernel(struct blorp_context *blorp, const unsigned *program; unsigned program_size; - struct brw_blorp_prog_data prog_data; + struct brw_wm_prog_data prog_data; /* Try and compile with NIR first. If that fails, fall back to the old * method of building shaders manually. @@ -1255,7 +1255,7 @@ brw_blorp_get_blit_kernel(struct blorp_context *blorp, blorp->upload_shader(blorp, prog_key, sizeof(*prog_key), program, program_size, - &prog_data, sizeof(prog_data), + &prog_data.base, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); ralloc_free(mem_ctx); diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index 7ed2808..7e5015a 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -74,7 +74,7 @@ blorp_params_get_clear_kernel(struct blorp_context *blorp, struct brw_wm_prog_key wm_key; brw_blorp_init_wm_prog_key(&wm_key); - struct brw_blorp_prog_data prog_data; + struct brw_wm_prog_data prog_data; unsigned program_size; const unsigned *program = blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, use_replicated_data, @@ -82,7 +82,7 @@ blorp_params_get_clear_kernel(struct blorp_context *blorp, blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key), program, program_size, - &prog_data, sizeof(prog_data), + &prog_data.base, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); ralloc_free(mem_ctx); diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index ec0d022..07c335a 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -207,7 +207,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch, for (unsigned i = 0; i < max_num_varyings; i++) { const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; - if (!(params->wm_prog_data->inputs_read & (1ull << attr))) + const int input_index = params->wm_prog_data->urb_setup[attr]; + if (input_index < 0) continue; memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); @@ -401,7 +402,7 @@ static void blorp_emit_sf_config(struct blorp_batch *batch, const struct blorp_params *params) { - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; + const struct brw_wm_prog_data *prog_data = params->wm_prog_data; /* 3DSTATE_SF * @@ -502,7 +503,7 @@ static void blorp_emit_ps_config(struct blorp_batch *batch, const struct blorp_params *params) { - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; + const struct brw_wm_prog_data *prog_data = params->wm_prog_data; /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be * nonzero to prevent the GPU from hanging. While the documentation doesn't @@ -527,16 +528,16 @@ blorp_emit_ps_config(struct blorp_batch *batch, if (prog_data) { ps.DispatchGRFStartRegisterForConstantSetupData0 = - prog_data->first_curbe_grf_0; + prog_data->base.dispatch_grf_start_reg; ps.DispatchGRFStartRegisterForConstantSetupData2 = - prog_data->first_curbe_grf_2; + prog_data->dispatch_grf_start_reg_2; ps._8PixelDispatchEnable = prog_data->dispatch_8; ps._16PixelDispatchEnable = prog_data->dispatch_16; ps.KernelStartPointer0 = params->wm_prog_kernel; ps.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->ksp_offset_2; + params->wm_prog_kernel + prog_data->prog_offset_2; } /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; @@ -577,7 +578,7 @@ blorp_emit_ps_config(struct blorp_batch *batch, if (prog_data) { psx.PixelShaderValid = true; psx.AttributeEnable = prog_data->num_varying_inputs > 0; - psx.PixelShaderIsPerSample = prog_data->persample_msaa_dispatch; + psx.PixelShaderIsPerSample = prog_data->persample_dispatch; } if (params->src.enabled) @@ -612,7 +613,7 @@ blorp_emit_ps_config(struct blorp_batch *batch, if (params->dst.surf.samples > 1) { wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; wm.MultisampleDispatchMode = - (prog_data && prog_data->persample_msaa_dispatch) ? + (prog_data && prog_data->persample_dispatch) ? MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; } else { wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; @@ -630,13 +631,13 @@ blorp_emit_ps_config(struct blorp_batch *batch, if (prog_data) { ps.DispatchGRFStartRegisterforConstantSetupData0 = - prog_data->first_curbe_grf_0; + prog_data->base.dispatch_grf_start_reg; ps.DispatchGRFStartRegisterforConstantSetupData2 = - prog_data->first_curbe_grf_2; + prog_data->dispatch_grf_start_reg_2; ps.KernelStartPointer0 = params->wm_prog_kernel; ps.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->ksp_offset_2; + params->wm_prog_kernel + prog_data->prog_offset_2; ps._8PixelDispatchEnable = prog_data->dispatch_8; ps._16PixelDispatchEnable = prog_data->dispatch_16; @@ -692,13 +693,13 @@ blorp_emit_ps_config(struct blorp_batch *batch, wm.ThreadDispatchEnable = true; wm.DispatchGRFStartRegisterforConstantSetupData0 = - prog_data->first_curbe_grf_0; + prog_data->base.dispatch_grf_start_reg; wm.DispatchGRFStartRegisterforConstantSetupData2 = - prog_data->first_curbe_grf_2; + prog_data->dispatch_grf_start_reg_2; wm.KernelStartPointer0 = params->wm_prog_kernel; wm.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->ksp_offset_2; + params->wm_prog_kernel + prog_data->prog_offset_2; wm._8PixelDispatchEnable = prog_data->dispatch_8; wm._16PixelDispatchEnable = prog_data->dispatch_16; @@ -714,7 +715,7 @@ blorp_emit_ps_config(struct blorp_batch *batch, if (params->dst.surf.samples > 1) { wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; wm.MultisampleDispatchMode = - (prog_data && prog_data->persample_msaa_dispatch) ? + (prog_data && prog_data->persample_dispatch) ? MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; } else { wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; @@ -1116,6 +1117,11 @@ blorp_emit_surface_states(struct blorp_batch *batch, } #if GEN_GEN >= 7 + blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), bt); + blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_HS), bt); + blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_DS), bt); + blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_GS), bt); + blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) { bt.PointertoPSBindingTable = bind_offset; } diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h index 9d14336..710479f 100644 --- a/src/intel/blorp/blorp_priv.h +++ b/src/intel/blorp/blorp_priv.h @@ -138,33 +138,8 @@ struct brw_blorp_wm_inputs uint32_t pad[1]; }; -struct brw_blorp_prog_data -{ - bool dispatch_8; - bool dispatch_16; - - uint8_t first_curbe_grf_0; - uint8_t first_curbe_grf_2; - - uint32_t ksp_offset_2; - - /** - * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more - * than one sample per pixel. - */ - bool persample_msaa_dispatch; - - /** - * Mask of which FS inputs are marked flat by the shader source. This is - * needed for setting up 3DSTATE_SF/SBE. - */ - uint32_t flat_inputs; - unsigned num_varying_inputs; - uint64_t inputs_read; -}; - static inline unsigned -brw_blorp_get_urb_length(const struct brw_blorp_prog_data *prog_data) +brw_blorp_get_urb_length(const struct brw_wm_prog_data *prog_data) { if (prog_data == NULL) return 1; @@ -197,7 +172,7 @@ struct blorp_params unsigned num_draw_buffers; unsigned num_layers; uint32_t wm_prog_kernel; - struct brw_blorp_prog_data *wm_prog_data; + struct brw_wm_prog_data *wm_prog_data; }; void blorp_params_init(struct blorp_params *params); @@ -314,7 +289,7 @@ blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx, struct nir_shader *nir, const struct brw_wm_prog_key *wm_key, bool use_repclear, - struct brw_blorp_prog_data *prog_data, + struct brw_wm_prog_data *wm_prog_data, unsigned *program_size); /** \} */ diff --git a/src/intel/common/gen_device_info.c b/src/intel/common/gen_device_info.c index 30df0b2..1dc1769 100644 --- a/src/intel/common/gen_device_info.c +++ b/src/intel/common/gen_device_info.c @@ -335,7 +335,6 @@ static const struct gen_device_info gen_device_info_chv = { .max_gs_threads = 336, \ .max_tcs_threads = 336, \ .max_tes_threads = 336, \ - .max_wm_threads = 64 * 9, \ .max_cs_threads = 56, \ .urb = { \ .size = 384, \ @@ -388,7 +387,6 @@ static const struct gen_device_info gen_device_info_bxt = { .max_tcs_threads = 112, .max_tes_threads = 112, .max_gs_threads = 112, - .max_wm_threads = 64 * 3, .max_cs_threads = 6 * 6, .urb = { .size = 192, @@ -411,7 +409,6 @@ static const struct gen_device_info gen_device_info_bxt_2x6 = { .max_tcs_threads = 56, /* XXX: guess */ .max_tes_threads = 56, .max_gs_threads = 56, - .max_wm_threads = 64 * 2, .max_cs_threads = 6 * 6, .urb = { .size = 128, @@ -427,18 +424,11 @@ static const struct gen_device_info gen_device_info_bxt_2x6 = { * There's no KBL entry. Using the default SKL (GEN9) GS entries value. */ -/* - * Both SKL and KBL support a maximum of 64 threads per - * Pixel Shader Dispatch (PSD) unit. - */ -#define KBL_MAX_THREADS_PER_PSD 64 - static const struct gen_device_info gen_device_info_kbl_gt1 = { GEN9_FEATURES, .gt = 1, .max_cs_threads = 7 * 6, - .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 2, .urb.size = 192, .num_slices = 1, }; @@ -448,7 +438,6 @@ static const struct gen_device_info gen_device_info_kbl_gt1_5 = { .gt = 1, .max_cs_threads = 7 * 6, - .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, .num_slices = 1, }; @@ -456,7 +445,6 @@ static const struct gen_device_info gen_device_info_kbl_gt2 = { GEN9_FEATURES, .gt = 2, - .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, .num_slices = 1, }; @@ -464,7 +452,6 @@ static const struct gen_device_info gen_device_info_kbl_gt3 = { GEN9_FEATURES, .gt = 3, - .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 6, .num_slices = 2, }; @@ -472,7 +459,6 @@ static const struct gen_device_info gen_device_info_kbl_gt4 = { GEN9_FEATURES, .gt = 4, - .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 9, /* * From the "L3 Allocation and Programming" documentation: * @@ -500,6 +486,25 @@ gen_get_device_info(int devid, struct gen_device_info *devinfo) return false; } + /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: + * + * "Scratch Space per slice is computed based on 4 sub-slices. SW must + * allocate scratch space enough so that each slice has 4 slices allowed." + * + * The equivalent internal documentation says that this programming note + * applies to all Gen9+ platforms. + * + * The hardware typically calculates the scratch space pointer by taking + * the base address, and adding per-thread-scratch-space * thread ID. + * Extra padding can be necessary depending how the thread IDs are + * calculated for a particular shader stage. + */ + if (devinfo->gen >= 9) { + devinfo->max_wm_threads = 64 /* threads-per-PSD */ + * devinfo->num_slices + * 4; /* effective subslices per slice */ + } + return true; } diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index ae18f8e..204c871 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -253,10 +253,7 @@ anv_block_pool_init(struct anv_block_pool *pool, assert(util_is_power_of_two(block_size)); pool->device = device; - pool->bo.gem_handle = 0; - pool->bo.offset = 0; - pool->bo.size = 0; - pool->bo.is_winsys_bo = false; + anv_bo_init(&pool->bo, 0, 0); pool->block_size = block_size; pool->free_list = ANV_FREE_LIST_EMPTY; pool->back_free_list = ANV_FREE_LIST_EMPTY; @@ -463,10 +460,8 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) * values back into pool. */ pool->map = map + center_bo_offset; pool->center_bo_offset = center_bo_offset; - pool->bo.gem_handle = gem_handle; - pool->bo.size = size; + anv_bo_init(&pool->bo, gem_handle, size); pool->bo.map = map; - pool->bo.index = 0; done: pthread_mutex_unlock(&pool->device->mutex); @@ -892,9 +887,9 @@ anv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool { for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { for (unsigned i = 0; i < 16; i++) { - struct anv_bo *bo = &pool->bos[i][s]; - if (bo->size > 0) - anv_gem_close(device, bo->gem_handle); + struct anv_scratch_bo *bo = &pool->bos[i][s]; + if (bo->exists > 0) + anv_gem_close(device, bo->bo.gem_handle); } } } @@ -909,70 +904,59 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048); assert(scratch_size_log2 < 16); - struct anv_bo *bo = &pool->bos[scratch_size_log2][stage]; + struct anv_scratch_bo *bo = &pool->bos[scratch_size_log2][stage]; - /* From now on, we go into a critical section. In order to remain - * thread-safe, we use the bo size as a lock. A value of 0 means we don't - * have a valid BO yet. A value of 1 means locked. A value greater than 1 - * means we have a bo of the given size. - */ + /* We can use "exists" to shortcut and ignore the critical section */ + if (bo->exists) + return &bo->bo; - if (bo->size > 1) - return bo; - - uint64_t size = __sync_val_compare_and_swap(&bo->size, 0, 1); - if (size == 0) { - /* We own the lock. Allocate a buffer */ - - const struct anv_physical_device *physical_device = - &device->instance->physicalDevice; - const struct gen_device_info *devinfo = &physical_device->info; - - /* WaCSScratchSize:hsw - * - * Haswell's scratch space address calculation appears to be sparse - * rather than tightly packed. The Thread ID has bits indicating which - * subslice, EU within a subslice, and thread within an EU it is. - * There's a maximum of two slices and two subslices, so these can be - * stored with a single bit. Even though there are only 10 EUs per - * subslice, this is stored in 4 bits, so there's an effective maximum - * value of 16 EUs. Similarly, although there are only 7 threads per EU, - * this is stored in a 3 bit number, giving an effective maximum value - * of 8 threads per EU. - * - * This means that we need to use 16 * 8 instead of 10 * 7 for the - * number of threads per subslice. - */ - const unsigned subslices = MAX2(physical_device->subslice_total, 1); - const unsigned scratch_ids_per_subslice = - device->info.is_haswell ? 16 * 8 : devinfo->max_cs_threads; + pthread_mutex_lock(&device->mutex); + + __sync_synchronize(); + if (bo->exists) + return &bo->bo; - uint32_t max_threads[] = { - [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, - [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads, - [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, - [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, - [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, - [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslices, - }; + const struct anv_physical_device *physical_device = + &device->instance->physicalDevice; + const struct gen_device_info *devinfo = &physical_device->info; + + /* WaCSScratchSize:hsw + * + * Haswell's scratch space address calculation appears to be sparse + * rather than tightly packed. The Thread ID has bits indicating which + * subslice, EU within a subslice, and thread within an EU it is. + * There's a maximum of two slices and two subslices, so these can be + * stored with a single bit. Even though there are only 10 EUs per + * subslice, this is stored in 4 bits, so there's an effective maximum + * value of 16 EUs. Similarly, although there are only 7 threads per EU, + * this is stored in a 3 bit number, giving an effective maximum value + * of 8 threads per EU. + * + * This means that we need to use 16 * 8 instead of 10 * 7 for the + * number of threads per subslice. + */ + const unsigned subslices = MAX2(physical_device->subslice_total, 1); + const unsigned scratch_ids_per_subslice = + device->info.is_haswell ? 16 * 8 : devinfo->max_cs_threads; - size = per_thread_scratch * max_threads[stage]; + uint32_t max_threads[] = { + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads, + [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslices, + }; - struct anv_bo new_bo; - anv_bo_init_new(&new_bo, device, size); + uint32_t size = per_thread_scratch * max_threads[stage]; - bo->gem_handle = new_bo.gem_handle; + anv_bo_init_new(&bo->bo, device, size); - /* Set the size last because we use it as a lock */ - __sync_synchronize(); - bo->size = size; + /* Set the exists last because it may be read by other threads */ + __sync_synchronize(); + bo->exists = true; - futex_wake((uint32_t *)&bo->size, INT_MAX); - } else { - /* Someone else got here first */ - while (bo->size == 1) - futex_wait((uint32_t *)&bo->size, 1); - } + pthread_mutex_unlock(&device->mutex); - return bo; + return &bo->bo; } diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index dfa9abf..b49e173 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -32,6 +32,8 @@ #include "genxml/gen7_pack.h" #include "genxml/gen8_pack.h" +#include "util/debug.h" + /** \file anv_batch_chain.c * * This file contains functions related to anv_cmd_buffer as a data @@ -297,8 +299,6 @@ anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, bbo->length = other_bbo->length; memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); - bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset; - *bbo_out = bbo; return VK_SUCCESS; @@ -318,7 +318,6 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, batch->next = batch->start = bbo->bo.map; batch->end = bbo->bo.map + bbo->bo.size - batch_padding; batch->relocs = &bbo->relocs; - bbo->last_ss_pool_bo_offset = 0; bbo->relocs.num_relocs = 0; } @@ -620,13 +619,10 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->pool->alloc); if (result != VK_SUCCESS) goto fail_bt_blocks; + cmd_buffer->last_ss_pool_center = 0; anv_cmd_buffer_new_binding_table_block(cmd_buffer); - cmd_buffer->execbuf2.objects = NULL; - cmd_buffer->execbuf2.bos = NULL; - cmd_buffer->execbuf2.array_length = 0; - return VK_SUCCESS; fail_bt_blocks: @@ -658,9 +654,6 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->batch_bos, link) { anv_batch_bo_destroy(bbo, cmd_buffer); } - - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos); } void @@ -688,6 +681,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->bt_next = 0; cmd_buffer->surface_relocs.num_relocs = 0; + cmd_buffer->last_ss_pool_center = 0; /* Reset the list of seen buffers */ cmd_buffer->seen_bbos.head = 0; @@ -857,56 +851,83 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, &secondary->surface_relocs, 0); } +struct anv_execbuf { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + struct anv_bo ** bos; + + /* Allocated length of the 'objects' and 'bos' arrays */ + uint32_t array_length; +}; + +static void +anv_execbuf_init(struct anv_execbuf *exec) +{ + memset(exec, 0, sizeof(*exec)); +} + +static void +anv_execbuf_finish(struct anv_execbuf *exec, + const VkAllocationCallbacks *alloc) +{ + vk_free(alloc, exec->objects); + vk_free(alloc, exec->bos); +} + static VkResult -anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, - struct anv_reloc_list *relocs) +anv_execbuf_add_bo(struct anv_execbuf *exec, + struct anv_bo *bo, + struct anv_reloc_list *relocs, + const VkAllocationCallbacks *alloc) { struct drm_i915_gem_exec_object2 *obj = NULL; - if (bo->index < cmd_buffer->execbuf2.bo_count && - cmd_buffer->execbuf2.bos[bo->index] == bo) - obj = &cmd_buffer->execbuf2.objects[bo->index]; + if (bo->index < exec->bo_count && exec->bos[bo->index] == bo) + obj = &exec->objects[bo->index]; if (obj == NULL) { /* We've never seen this one before. Add it to the list and assign * an id that we can use later. */ - if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { - uint32_t new_len = cmd_buffer->execbuf2.objects ? - cmd_buffer->execbuf2.array_length * 2 : 64; + if (exec->bo_count >= exec->array_length) { + uint32_t new_len = exec->objects ? exec->array_length * 2 : 64; struct drm_i915_gem_exec_object2 *new_objects = - vk_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + vk_alloc(alloc, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (new_objects == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); struct anv_bo **new_bos = - vk_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + vk_alloc(alloc, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (new_bos == NULL) { - vk_free(&cmd_buffer->pool->alloc, new_objects); + vk_free(alloc, new_objects); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - if (cmd_buffer->execbuf2.objects) { - memcpy(new_objects, cmd_buffer->execbuf2.objects, - cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->execbuf2.bos, - cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + if (exec->objects) { + memcpy(new_objects, exec->objects, + exec->bo_count * sizeof(*new_objects)); + memcpy(new_bos, exec->bos, + exec->bo_count * sizeof(*new_bos)); } - cmd_buffer->execbuf2.objects = new_objects; - cmd_buffer->execbuf2.bos = new_bos; - cmd_buffer->execbuf2.array_length = new_len; + vk_free(alloc, exec->objects); + vk_free(alloc, exec->bos); + + exec->objects = new_objects; + exec->bos = new_bos; + exec->array_length = new_len; } - assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); + assert(exec->bo_count < exec->array_length); - bo->index = cmd_buffer->execbuf2.bo_count++; - obj = &cmd_buffer->execbuf2.objects[bo->index]; - cmd_buffer->execbuf2.bos[bo->index] = bo; + bo->index = exec->bo_count++; + obj = &exec->objects[bo->index]; + exec->bos[bo->index] = bo; obj->handle = bo->gem_handle; obj->relocation_count = 0; @@ -929,7 +950,7 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, for (size_t i = 0; i < relocs->num_relocs; i++) { /* A quick sanity check on relocations */ assert(relocs->relocs[i].offset < bo->size); - anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL, alloc); } } @@ -940,82 +961,62 @@ static void anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, struct anv_reloc_list *list) { - struct anv_bo *bo; - - /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in - * struct drm_i915_gem_exec_object2 against the bos current offset and if - * all bos haven't moved it will skip relocation processing alltogether. - * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming - * value of offset so we can set it either way. For that to work we need - * to make sure all relocs use the same presumed offset. - */ - - for (size_t i = 0; i < list->num_relocs; i++) { - bo = list->reloc_bos[i]; - if (bo->offset != list->relocs[i].presumed_offset) - cmd_buffer->execbuf2.need_reloc = true; - - list->relocs[i].target_handle = bo->index; - } -} - -static uint64_t -read_reloc(const struct anv_device *device, const void *p) -{ - if (device->info.gen >= 8) - return *(uint64_t *)p; - else - return *(uint32_t *)p; + for (size_t i = 0; i < list->num_relocs; i++) + list->relocs[i].target_handle = list->reloc_bos[i]->index; } static void -write_reloc(const struct anv_device *device, void *p, uint64_t v) +write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush) { - if (device->info.gen >= 8) - *(uint64_t *)p = v; - else + unsigned reloc_size = 0; + if (device->info.gen >= 8) { + /* From the Broadwell PRM Vol. 2a, MI_LOAD_REGISTER_MEM::MemoryAddress: + * + * "This field specifies the address of the memory location where the + * register value specified in the DWord above will read from. The + * address specifies the DWord location of the data. Range = + * GraphicsVirtualAddress[63:2] for a DWord register GraphicsAddress + * [63:48] are ignored by the HW and assumed to be in correct + * canonical form [63:48] == [47]." + */ + const int shift = 63 - 47; + reloc_size = sizeof(uint64_t); + *(uint64_t *)p = (((int64_t)v) << shift) >> shift; + } else { + reloc_size = sizeof(uint32_t); *(uint32_t *)p = v; + } + + if (flush && !device->info.has_llc) + anv_clflush_range(p, reloc_size); } static void -adjust_relocations_from_block_pool(struct anv_block_pool *pool, - struct anv_reloc_list *relocs) +adjust_relocations_from_state_pool(struct anv_block_pool *pool, + struct anv_reloc_list *relocs, + uint32_t last_pool_center_bo_offset) { - for (size_t i = 0; i < relocs->num_relocs; i++) { - /* In general, we don't know how stale the relocated value is. It - * may have been used last time or it may not. Since we don't want - * to stomp it while the GPU may be accessing it, we haven't updated - * it anywhere else in the code. Instead, we just set the presumed - * offset to what it is now based on the delta and the data in the - * block pool. Then the kernel will update it for us if needed. - */ - assert(relocs->relocs[i].offset < pool->state.end); - const void *p = pool->map + relocs->relocs[i].offset; - - /* We're reading back the relocated value from potentially incoherent - * memory here. However, any change to the value will be from the kernel - * writing out relocations, which will keep the CPU cache up to date. - */ - relocs->relocs[i].presumed_offset = - read_reloc(pool->device, p) - relocs->relocs[i].delta; + assert(last_pool_center_bo_offset <= pool->center_bo_offset); + uint32_t delta = pool->center_bo_offset - last_pool_center_bo_offset; + for (size_t i = 0; i < relocs->num_relocs; i++) { /* All of the relocations from this block pool to other BO's should * have been emitted relative to the surface block pool center. We * need to add the center offset to make them relative to the * beginning of the actual GEM bo. */ - relocs->relocs[i].offset += pool->center_bo_offset; + relocs->relocs[i].offset += delta; } } static void -adjust_relocations_to_block_pool(struct anv_block_pool *pool, +adjust_relocations_to_state_pool(struct anv_block_pool *pool, struct anv_bo *from_bo, struct anv_reloc_list *relocs, - uint32_t *last_pool_center_bo_offset) + uint32_t last_pool_center_bo_offset) { - assert(*last_pool_center_bo_offset <= pool->center_bo_offset); - uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset; + assert(last_pool_center_bo_offset <= pool->center_bo_offset); + uint32_t delta = pool->center_bo_offset - last_pool_center_bo_offset; /* When we initially emit relocations into a block pool, we don't * actually know what the final center_bo_offset will be so we just emit @@ -1040,37 +1041,147 @@ adjust_relocations_to_block_pool(struct anv_block_pool *pool, assert(relocs->relocs[i].offset < from_bo->size); write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset, relocs->relocs[i].presumed_offset + - relocs->relocs[i].delta); + relocs->relocs[i].delta, false); } } +} - *last_pool_center_bo_offset = pool->center_bo_offset; +static void +anv_reloc_list_apply(struct anv_device *device, + struct anv_reloc_list *list, + struct anv_bo *bo, + bool always_relocate) +{ + for (size_t i = 0; i < list->num_relocs; i++) { + struct anv_bo *target_bo = list->reloc_bos[i]; + if (list->relocs[i].presumed_offset == target_bo->offset && + !always_relocate) + continue; + + void *p = bo->map + list->relocs[i].offset; + write_reloc(device, p, target_bo->offset + list->relocs[i].delta, true); + list->relocs[i].presumed_offset = target_bo->offset; + } } -void -anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +/** + * This function applies the relocation for a command buffer and writes the + * actual addresses into the buffers as per what we were told by the kernel on + * the previous execbuf2 call. This should be safe to do because, for each + * relocated address, we have two cases: + * + * 1) The target BO is inactive (as seen by the kernel). In this case, it is + * not in use by the GPU so updating the address is 100% ok. It won't be + * in-use by the GPU (from our context) again until the next execbuf2 + * happens. If the kernel decides to move it in the next execbuf2, it + * will have to do the relocations itself, but that's ok because it should + * have all of the information needed to do so. + * + * 2) The target BO is active (as seen by the kernel). In this case, it + * hasn't moved since the last execbuffer2 call because GTT shuffling + * *only* happens when the BO is idle. (From our perspective, it only + * happens inside the execbuffer2 ioctl, but the shuffling may be + * triggered by another ioctl, with full-ppgtt this is limited to only + * execbuffer2 ioctls on the same context, or memory pressure.) Since the + * target BO hasn't moved, our anv_bo::offset exactly matches the BO's GTT + * address and the relocated value we are writing into the BO will be the + * same as the value that is already there. + * + * There is also a possibility that the target BO is active but the exact + * RENDER_SURFACE_STATE object we are writing the relocation into isn't in + * use. In this case, the address currently in the RENDER_SURFACE_STATE + * may be stale but it's still safe to write the relocation because that + * particular RENDER_SURFACE_STATE object isn't in-use by the GPU and + * won't be until the next execbuf2 call. + * + * By doing relocations on the CPU, we can tell the kernel that it doesn't + * need to bother. We want to do this because the surface state buffer is + * used by every command buffer so, if the kernel does the relocations, it + * will always be busy and the kernel will always stall. This is also + * probably the fastest mechanism for doing relocations since the kernel would + * have to make a full copy of all the relocations lists. + */ +static bool +relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer, + struct anv_execbuf *exec) +{ + static int userspace_relocs = -1; + if (userspace_relocs < 0) + userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true); + if (!userspace_relocs) + return false; + + /* First, we have to check to see whether or not we can even do the + * relocation. New buffers which have never been submitted to the kernel + * don't have a valid offset so we need to let the kernel do relocations so + * that we can get offsets for them. On future execbuf2 calls, those + * buffers will have offsets and we will be able to skip relocating. + * Invalid offsets are indicated by anv_bo::offset == (uint64_t)-1. + */ + for (uint32_t i = 0; i < exec->bo_count; i++) { + if (exec->bos[i]->offset == (uint64_t)-1) + return false; + } + + /* Since surface states are shared between command buffers and we don't + * know what order they will be submitted to the kernel, we don't know + * what address is actually written in the surface state object at any + * given time. The only option is to always relocate them. + */ + anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs, + &cmd_buffer->device->surface_state_block_pool.bo, + true /* always relocate surface states */); + + /* Since we own all of the batch buffers, we know what values are stored + * in the relocated addresses and only have to update them if the offsets + * have changed. + */ + struct anv_batch_bo **bbo; + u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + anv_reloc_list_apply(cmd_buffer->device, + &(*bbo)->relocs, &(*bbo)->bo, false); + } + + for (uint32_t i = 0; i < exec->bo_count; i++) + exec->objects[i].offset = exec->bos[i]->offset; + + return true; +} + +VkResult +anv_cmd_buffer_execbuf(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) { struct anv_batch *batch = &cmd_buffer->batch; struct anv_block_pool *ss_pool = &cmd_buffer->device->surface_state_block_pool; - cmd_buffer->execbuf2.bo_count = 0; - cmd_buffer->execbuf2.need_reloc = false; + struct anv_execbuf execbuf; + anv_execbuf_init(&execbuf); - adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs); - anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs); + adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs, + cmd_buffer->last_ss_pool_center); + anv_execbuf_add_bo(&execbuf, &ss_pool->bo, &cmd_buffer->surface_relocs, + &cmd_buffer->pool->alloc); /* First, we walk over all of the bos we've seen and add them and their * relocations to the validate list. */ struct anv_batch_bo **bbo; u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { - adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, - &(*bbo)->last_ss_pool_bo_offset); + adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, + cmd_buffer->last_ss_pool_center); - anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + anv_execbuf_add_bo(&execbuf, &(*bbo)->bo, &(*bbo)->relocs, + &cmd_buffer->pool->alloc); } + /* Now that we've adjusted all of the surface state relocations, we need to + * record the surface state pool center so future executions of the command + * buffer can adjust correctly. + */ + cmd_buffer->last_ss_pool_center = ss_pool->center_bo_offset; + struct anv_batch_bo *first_batch_bo = list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); @@ -1079,20 +1190,19 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) * corresponding to the first batch_bo in the chain with the last * element in the list. */ - if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + if (first_batch_bo->bo.index != execbuf.bo_count - 1) { uint32_t idx = first_batch_bo->bo.index; - uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1; + uint32_t last_idx = execbuf.bo_count - 1; - struct drm_i915_gem_exec_object2 tmp_obj = - cmd_buffer->execbuf2.objects[idx]; - assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + struct drm_i915_gem_exec_object2 tmp_obj = execbuf.objects[idx]; + assert(execbuf.bos[idx] == &first_batch_bo->bo); - cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx]; - cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx]; - cmd_buffer->execbuf2.bos[idx]->index = idx; + execbuf.objects[idx] = execbuf.objects[last_idx]; + execbuf.bos[idx] = execbuf.bos[last_idx]; + execbuf.bos[idx]->index = idx; - cmd_buffer->execbuf2.objects[last_idx] = tmp_obj; - cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo; + execbuf.objects[last_idx] = tmp_obj; + execbuf.bos[last_idx] = &first_batch_bo->bo; first_batch_bo->bo.index = last_idx; } @@ -1113,9 +1223,9 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) } } - cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, - .buffer_count = cmd_buffer->execbuf2.bo_count, + execbuf.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf.objects, + .buffer_count = execbuf.bo_count, .batch_start_offset = 0, .batch_len = batch->next - batch->start, .cliprects_ptr = 0, @@ -1128,6 +1238,49 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) .rsvd2 = 0, }; - if (!cmd_buffer->execbuf2.need_reloc) - cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; + if (relocate_cmd_buffer(cmd_buffer, &execbuf)) { + /* If we were able to successfully relocate everything, tell the kernel + * that it can skip doing relocations. The requirement for using + * NO_RELOC is: + * + * 1) The addresses written in the objects must match the corresponding + * reloc.presumed_offset which in turn must match the corresponding + * execobject.offset. + * + * 2) To avoid stalling, execobject.offset should match the current + * address of that object within the active context. + * + * In order to satisfy all of the invariants that make userspace + * relocations to be safe (see relocate_cmd_buffer()), we need to + * further ensure that the addresses we use match those used by the + * kernel for the most recent execbuf2. + * + * The kernel may still choose to do relocations anyway if something has + * moved in the GTT. In this case, the relocation list still needs to be + * valid. All relocations on the batch buffers are already valid and + * kept up-to-date. For surface state relocations, by applying the + * relocations in relocate_cmd_buffer, we ensured that the address in + * the RENDER_SURFACE_STATE matches presumed_offset, so it should be + * safe for the kernel to relocate them as needed. + */ + execbuf.execbuf.flags |= I915_EXEC_NO_RELOC; + } else { + /* In the case where we fall back to doing kernel relocations, we need + * to ensure that the relocation list is valid. All relocations on the + * batch buffers are already valid and kept up-to-date. Since surface + * states are shared between command buffers and we don't know what + * order they will be submitted to the kernel, we don't know what + * address is actually written in the surface state object at any given + * time. The only option is to set a bogus presumed offset and let the + * kernel relocate them. + */ + for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++) + cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1; + } + + VkResult result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos); + + anv_execbuf_finish(&execbuf, &cmd_buffer->pool->alloc); + + return result; } diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 5361c4b..87f242c 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -44,8 +44,7 @@ lookup_blorp_shader(struct blorp_context *blorp, anv_shader_bin_unref(device, bin); *kernel_out = bin->kernel.offset; - *(const struct brw_stage_prog_data **)prog_data_out = - anv_shader_bin_get_prog_data(bin); + *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data; return true; } @@ -54,7 +53,8 @@ static void upload_blorp_shader(struct blorp_context *blorp, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, uint32_t *kernel_out, void *prog_data_out) { struct anv_device *device = blorp->driver_ctx; @@ -78,8 +78,7 @@ upload_blorp_shader(struct blorp_context *blorp, anv_shader_bin_unref(device, bin); *kernel_out = bin->kernel.offset; - *(const struct brw_stage_prog_data **)prog_data_out = - anv_shader_bin_get_prog_data(bin); + *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data; } void diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index a652f9a..7ff7dba 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -658,7 +658,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; const struct brw_stage_prog_data *prog_data = - anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]); + cmd_buffer->state.pipeline->shaders[stage]->prog_data; /* If we don't actually have any push constants, bail. */ if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index c995630..e83887c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -203,19 +203,19 @@ static const VkExtensionProperties global_extensions[] = { #ifdef VK_USE_PLATFORM_XCB_KHR { .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, - .specVersion = 5, + .specVersion = 6, }, #endif #ifdef VK_USE_PLATFORM_XLIB_KHR { .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME, - .specVersion = 5, + .specVersion = 6, }, #endif #ifdef VK_USE_PLATFORM_WAYLAND_KHR { .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, - .specVersion = 4, + .specVersion = 5, }, #endif }; @@ -223,7 +223,7 @@ static const VkExtensionProperties global_extensions[] = { static const VkExtensionProperties device_extensions[] = { { .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, - .specVersion = 67, + .specVersion = 68, }, }; @@ -350,7 +350,7 @@ VkResult anv_EnumeratePhysicalDevices( snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i); result = anv_physical_device_init(&instance->physicalDevice, instance, path); - if (result == VK_SUCCESS) + if (result != VK_ERROR_INCOMPATIBLE_DRIVER) break; } @@ -770,7 +770,7 @@ anv_device_submit_simple_batch(struct anv_device *device, { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo bo; + struct anv_bo bo, *exec_bos[1]; VkResult result = VK_SUCCESS; uint32_t size; int64_t timeout; @@ -786,6 +786,7 @@ anv_device_submit_simple_batch(struct anv_device *device, if (!device->info.has_llc) anv_clflush_range(bo.map, size); + exec_bos[0] = &bo; exec2_objects[0].handle = bo.gem_handle; exec2_objects[0].relocation_count = 0; exec2_objects[0].relocs_ptr = 0; @@ -809,18 +810,15 @@ anv_device_submit_simple_batch(struct anv_device *device, execbuf.rsvd1 = device->context_id; execbuf.rsvd2 = 0; - ret = anv_gem_execbuffer(device, &execbuf); - if (ret != 0) { - /* We don't know the real error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + result = anv_device_execbuf(device, &execbuf, exec_bos); + if (result != VK_SUCCESS) goto fail; - } timeout = INT64_MAX; ret = anv_gem_wait(device, bo.gem_handle, &timeout); if (ret != 0) { /* We don't know the real error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + result = vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m"); goto fail; } @@ -1070,6 +1068,24 @@ void anv_GetDeviceQueue( *pQueue = anv_queue_to_handle(&device->queue); } +VkResult +anv_device_execbuf(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf, + struct anv_bo **execbuf_bos) +{ + int ret = anv_gem_execbuffer(device, execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m"); + } + + struct drm_i915_gem_exec_object2 *objects = (void *)execbuf->buffers_ptr; + for (uint32_t k = 0; k < execbuf->buffer_count; k++) + execbuf_bos[k]->offset = objects[k].offset; + + return VK_SUCCESS; +} + VkResult anv_QueueSubmit( VkQueue _queue, uint32_t submitCount, @@ -1079,7 +1095,34 @@ VkResult anv_QueueSubmit( ANV_FROM_HANDLE(anv_queue, queue, _queue); ANV_FROM_HANDLE(anv_fence, fence, _fence); struct anv_device *device = queue->device; - int ret; + VkResult result = VK_SUCCESS; + + /* We lock around QueueSubmit for three main reasons: + * + * 1) When a block pool is resized, we create a new gem handle with a + * different size and, in the case of surface states, possibly a + * different center offset but we re-use the same anv_bo struct when + * we do so. If this happens in the middle of setting up an execbuf, + * we could end up with our list of BOs out of sync with our list of + * gem handles. + * + * 2) The algorithm we use for building the list of unique buffers isn't + * thread-safe. While the client is supposed to syncronize around + * QueueSubmit, this would be extremely difficult to debug if it ever + * came up in the wild due to a broken app. It's better to play it + * safe and just lock around QueueSubmit. + * + * 3) The anv_cmd_buffer_execbuf function may perform relocations in + * userspace. Due to the fact that the surface state buffer is shared + * between batches, we can't afford to have that happen from multiple + * threads at the same time. Even though the user is supposed to + * ensure this doesn't happen, we play it safe as in (2) above. + * + * Since the only other things that ever take the device lock such as block + * pool resize only rarely happen, this will almost never be contended so + * taking a lock isn't really an expensive operation in this case. + */ + pthread_mutex_lock(&device->mutex); for (uint32_t i = 0; i < submitCount; i++) { for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { @@ -1087,28 +1130,23 @@ VkResult anv_QueueSubmit( pSubmits[i].pCommandBuffers[j]); assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "execbuf2 failed: %m"); - } - - for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) - cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; + result = anv_cmd_buffer_execbuf(device, cmd_buffer); + if (result != VK_SUCCESS) + goto out; } } if (fence) { - ret = anv_gem_execbuffer(device, &fence->execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "execbuf2 failed: %m"); - } + struct anv_bo *fence_bo = &fence->bo; + result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); + if (result != VK_SUCCESS) + goto out; } - return VK_SUCCESS; +out: + pthread_mutex_unlock(&device->mutex); + + return result; } VkResult anv_QueueWaitIdle( @@ -1138,15 +1176,11 @@ VkResult anv_DeviceWaitIdle( VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) { - bo->gem_handle = anv_gem_create(device, size); - if (!bo->gem_handle) + uint32_t gem_handle = anv_gem_create(device, size); + if (!gem_handle) return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - bo->map = NULL; - bo->index = 0; - bo->offset = 0; - bo->size = size; - bo->is_winsys_bo = false; + anv_bo_init(bo, gem_handle, size); return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index 3e1cc3f..1c50e2b 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -49,16 +49,15 @@ VkResult anv_CreateDmaBufImageINTEL( if (mem == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); - if (!mem->bo.gem_handle) { + uint32_t gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!gem_handle) { result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); goto fail; } - mem->bo.map = NULL; - mem->bo.index = 0; - mem->bo.offset = 0; - mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + uint64_t size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + anv_bo_init(&mem->bo, gem_handle, size); anv_image_create(_device, &(struct anv_image_create_info) { diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 4817de1..4b8020a 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -388,7 +388,8 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, const struct anv_pipeline_bind_map *bind_map) { if (cache) { @@ -399,7 +400,8 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, } else { return anv_shader_bin_create(pipeline->device, key_data, key_size, kernel_data, kernel_size, - prog_data, prog_data_size, bind_map); + prog_data, prog_data_size, + prog_data->param, bind_map); } } @@ -476,7 +478,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, - &prog_data, sizeof(prog_data), &map); + &prog_data.base.base, sizeof(prog_data), + &map); if (!bin) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -486,7 +489,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } const struct brw_vs_prog_data *vs_prog_data = - (const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin); + (const struct brw_vs_prog_data *)bin->prog_data; if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { pipeline->vs_simd8 = bin->kernel.offset; @@ -563,7 +566,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, /* TODO: SIMD8 GS */ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, - &prog_data, sizeof(prog_data), &map); + &prog_data.base.base, sizeof(prog_data), + &map); if (!bin) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -686,7 +690,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, - &prog_data, sizeof(prog_data), &map); + &prog_data.base, sizeof(prog_data), + &map); if (!bin) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -758,7 +763,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, - &prog_data, sizeof(prog_data), &map); + &prog_data.base, sizeof(prog_data), + &map); if (!bin) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 79df315..ff6e651 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -26,13 +26,9 @@ #include "util/debug.h" #include "anv_private.h" -struct shader_bin_key { - uint32_t size; - uint8_t data[0]; -}; - static size_t -anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size, +anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params, + uint32_t key_size, uint32_t surface_count, uint32_t sampler_count) { const uint32_t binding_data_size = @@ -40,28 +36,21 @@ anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size, return align_u32(sizeof(struct anv_shader_bin), 8) + align_u32(prog_data_size, 8) + + align_u32(nr_params * sizeof(void *), 8) + align_u32(sizeof(uint32_t) + key_size, 8) + align_u32(binding_data_size, 8); } -static inline const struct shader_bin_key * -anv_shader_bin_get_key(const struct anv_shader_bin *shader) -{ - const void *data = shader; - data += align_u32(sizeof(struct anv_shader_bin), 8); - data += align_u32(shader->prog_data_size, 8); - return data; -} - struct anv_shader_bin * anv_shader_bin_create(struct anv_device *device, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, const void *prog_data_param, const struct anv_pipeline_bind_map *bind_map) { const size_t size = - anv_shader_bin_size(prog_data_size, key_size, + anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size, bind_map->surface_count, bind_map->sampler_count); struct anv_shader_bin *shader = @@ -82,10 +71,20 @@ anv_shader_bin_create(struct anv_device *device, void *data = shader; data += align_u32(sizeof(struct anv_shader_bin), 8); + shader->prog_data = data; + struct brw_stage_prog_data *new_prog_data = data; memcpy(data, prog_data, prog_data_size); data += align_u32(prog_data_size, 8); - struct shader_bin_key *key = data; + assert(prog_data->nr_pull_params == 0); + assert(prog_data->nr_image_params == 0); + new_prog_data->param = data; + uint32_t param_size = prog_data->nr_params * sizeof(void *); + memcpy(data, prog_data_param, param_size); + data += align_u32(param_size, 8); + + shader->key = data; + struct anv_shader_bin_key *key = data; key->size = key_size; memcpy(key->data, key_data, key_size); data += align_u32(sizeof(*key) + key_size, 8); @@ -115,7 +114,7 @@ static size_t anv_shader_bin_data_size(const struct anv_shader_bin *shader) { return anv_shader_bin_size(shader->prog_data_size, - anv_shader_bin_get_key(shader)->size, + shader->prog_data->nr_params, shader->key->size, shader->bind_map.surface_count, shader->bind_map.sampler_count) + align_u32(shader->kernel_size, 8); @@ -126,7 +125,7 @@ anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data) { size_t struct_size = anv_shader_bin_size(shader->prog_data_size, - anv_shader_bin_get_key(shader)->size, + shader->prog_data->nr_params, shader->key->size, shader->bind_map.surface_count, shader->bind_map.sampler_count); @@ -151,14 +150,14 @@ anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data) static uint32_t shader_bin_key_hash_func(const void *void_key) { - const struct shader_bin_key *key = void_key; + const struct anv_shader_bin_key *key = void_key; return _mesa_hash_data(key->data, key->size); } static bool shader_bin_key_compare_func(const void *void_a, const void *void_b) { - const struct shader_bin_key *a = void_a, *b = void_b; + const struct anv_shader_bin_key *a = void_a, *b = void_b; if (a->size != b->size) return false; @@ -230,7 +229,7 @@ anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size) { uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; - struct shader_bin_key *key = (void *)vla; + struct anv_shader_bin_key *key = (void *)vla; key->size = key_size; memcpy(key->data, key_data, key_size); @@ -266,7 +265,9 @@ static struct anv_shader_bin * anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, + const void *prog_data_param, const struct anv_pipeline_bind_map *bind_map) { struct anv_shader_bin *shader = @@ -277,11 +278,12 @@ anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, struct anv_shader_bin *bin = anv_shader_bin_create(cache->device, key_data, key_size, kernel_data, kernel_size, - prog_data, prog_data_size, bind_map); + prog_data, prog_data_size, prog_data_param, + bind_map); if (!bin) return NULL; - _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin); + _mesa_hash_table_insert(cache->cache, bin->key, bin); return bin; } @@ -290,7 +292,8 @@ struct anv_shader_bin * anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, const struct anv_pipeline_bind_map *bind_map) { if (cache->cache) { @@ -299,7 +302,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, struct anv_shader_bin *bin = anv_pipeline_cache_add_shader(cache, key_data, key_size, kernel_data, kernel_size, - prog_data, prog_data_size, bind_map); + prog_data, prog_data_size, + prog_data->param, bind_map); pthread_mutex_unlock(&cache->mutex); @@ -311,7 +315,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, /* In this case, we're not caching it so the caller owns it entirely */ return anv_shader_bin_create(cache->device, key_data, key_size, kernel_data, kernel_size, - prog_data, prog_data_size, bind_map); + prog_data, prog_data_size, + prog_data->param, bind_map); } } @@ -366,10 +371,16 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, memcpy(&bin, p, sizeof(bin)); p += align_u32(sizeof(struct anv_shader_bin), 8); - const void *prog_data = p; + const struct brw_stage_prog_data *prog_data = p; p += align_u32(bin.prog_data_size, 8); + if (p > end) + break; + + uint32_t param_size = prog_data->nr_params * sizeof(void *); + const void *prog_data_param = p; + p += align_u32(param_size, 8); - struct shader_bin_key key; + struct anv_shader_bin_key key; if (p + sizeof(key) > end) break; memcpy(&key, p, sizeof(key)); @@ -392,7 +403,7 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, anv_pipeline_cache_add_shader(cache, key_data, key.size, kernel_data, bin.kernel_size, prog_data, bin.prog_data_size, - &bin.bind_map); + prog_data_param, &bin.bind_map); } } @@ -532,11 +543,11 @@ VkResult anv_MergePipelineCaches( struct hash_entry *entry; hash_table_foreach(src->cache, entry) { struct anv_shader_bin *bin = entry->data; - if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin))) + if (_mesa_hash_table_search(dst->cache, bin->key)) continue; anv_shader_bin_ref(bin); - _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin); + _mesa_hash_table_insert(dst->cache, bin->key, bin); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 0e25827..31b4766 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -267,6 +267,17 @@ struct anv_bo { bool is_winsys_bo; }; +static inline void +anv_bo_init(struct anv_bo *bo, uint32_t gem_handle, uint64_t size) +{ + bo->gem_handle = gem_handle; + bo->index = 0; + bo->offset = -1; + bo->size = size; + bo->map = NULL; + bo->is_winsys_bo = false; +} + /* Represents a lock-free linked list of "free" things. This is used by * both the block pool and the state pools. Unfortunately, in order to * solve the ABA problem, we can't use a single uint32_t head. @@ -439,9 +450,14 @@ VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size); void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); +struct anv_scratch_bo { + bool exists; + struct anv_bo bo; +}; + struct anv_scratch_pool { /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */ - struct anv_bo bos[16][MESA_SHADER_STAGES]; + struct anv_scratch_bo bos[16][MESA_SHADER_STAGES]; }; void anv_scratch_pool_init(struct anv_device *device, @@ -518,7 +534,8 @@ struct anv_shader_bin * anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, const struct anv_pipeline_bind_map *bind_map); struct anv_device { @@ -567,6 +584,10 @@ void anv_device_get_cache_uuid(void *uuid); void anv_device_init_blorp(struct anv_device *device); void anv_device_finish_blorp(struct anv_device *device); +VkResult anv_device_execbuf(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf, + struct anv_bo **execbuf_bos); + void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); void anv_gem_munmap(void *p, uint64_t size); @@ -617,9 +638,6 @@ struct anv_batch_bo { /* Bytes actually consumed in this batch BO */ size_t length; - /* Last seen surface state block pool bo offset */ - uint32_t last_ss_pool_bo_offset; - struct anv_reloc_list relocs; }; @@ -1153,24 +1171,10 @@ struct anv_cmd_buffer { */ struct u_vector bt_blocks; uint32_t bt_next; - struct anv_reloc_list surface_relocs; - - /* Information needed for execbuf - * - * These fields are generated by anv_cmd_buffer_prepare_execbuf(). - */ - struct { - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 * objects; - uint32_t bo_count; - struct anv_bo ** bos; - - /* Allocated length of the 'objects' and 'bos' arrays */ - uint32_t array_length; - - bool need_reloc; - } execbuf2; + struct anv_reloc_list surface_relocs; + /** Last seen surface state block pool center bo offset */ + uint32_t last_ss_pool_center; /* Serial for tracking buffer completion */ uint32_t serial; @@ -1192,6 +1196,8 @@ void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_cmd_buffer *secondary); void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); +VkResult anv_cmd_buffer_execbuf(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer); VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer); @@ -1299,24 +1305,33 @@ struct anv_pipeline_bind_map { struct anv_pipeline_binding * sampler_to_descriptor; }; +struct anv_shader_bin_key { + uint32_t size; + uint8_t data[0]; +}; + struct anv_shader_bin { uint32_t ref_cnt; + const struct anv_shader_bin_key *key; + struct anv_state kernel; uint32_t kernel_size; - struct anv_pipeline_bind_map bind_map; - + const struct brw_stage_prog_data *prog_data; uint32_t prog_data_size; - /* Prog data follows, then the key, both aligned to 8-bytes */ + struct anv_pipeline_bind_map bind_map; + + /* Prog data follows, then params, then the key, all aligned to 8-bytes */ }; struct anv_shader_bin * anv_shader_bin_create(struct anv_device *device, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, const void *prog_data_param, const struct anv_pipeline_bind_map *bind_map); void @@ -1337,14 +1352,6 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) anv_shader_bin_destroy(device, shader); } -static inline const struct brw_stage_prog_data * -anv_shader_bin_get_prog_data(const struct anv_shader_bin *shader) -{ - const void *data = shader; - data += align_u32(sizeof(struct anv_shader_bin), 8); - return data; -} - struct anv_pipeline { struct anv_device * device; struct anv_batch batch; @@ -1411,7 +1418,7 @@ get_##prefix##_prog_data(struct anv_pipeline *pipeline) \ { \ if (anv_pipeline_has_stage(pipeline, stage)) { \ return (const struct brw_##prefix##_prog_data *) \ - anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \ + pipeline->shaders[stage]->prog_data; \ } else { \ return NULL; \ } \ diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 24e0012..2bc7e74 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -200,20 +200,9 @@ genX(EndCommandBuffer)( VkCommandBuffer commandBuffer) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_device *device = cmd_buffer->device; anv_cmd_buffer_end_batch_buffer(cmd_buffer); - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - /* The algorithm used to compute the validate list is not threadsafe as - * it uses the bo->index field. We have to lock the device around it. - * Fortunately, the chances for contention here are probably very low. - */ - pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_prepare_execbuf(cmd_buffer); - pthread_mutex_unlock(&device->mutex); - } - return VK_SUCCESS; } @@ -1883,22 +1872,25 @@ void genX(CmdEndRenderPass)( } static void -emit_ps_depth_count(struct anv_batch *batch, +emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, uint32_t offset) { - anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WritePSDepthCount; pc.DepthStallEnable = true; pc.Address = (struct anv_address) { bo, offset }; + + if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4) + pc.CommandStreamerStallEnable = true; } } static void -emit_query_availability(struct anv_batch *batch, +emit_query_availability(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, uint32_t offset) { - anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; pc.Address = (struct anv_address) { bo, offset }; @@ -1931,7 +1923,7 @@ void genX(CmdBeginQuery)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + emit_ps_depth_count(cmd_buffer, &pool->bo, query * sizeof(struct anv_query_pool_slot)); break; @@ -1951,10 +1943,10 @@ void genX(CmdEndQuery)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + emit_ps_depth_count(cmd_buffer, &pool->bo, query * sizeof(struct anv_query_pool_slot) + 8); - emit_query_availability(&cmd_buffer->batch, &pool->bo, + emit_query_availability(cmd_buffer, &pool->bo, query * sizeof(struct anv_query_pool_slot) + 16); break; @@ -1996,11 +1988,14 @@ void genX(CmdWriteTimestamp)( pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteTimestamp; pc.Address = (struct anv_address) { &pool->bo, offset }; + + if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4) + pc.CommandStreamerStallEnable = true; } break; } - emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); + emit_query_availability(cmd_buffer, &pool->bo, query + 16); } #if GEN_GEN > 7 || GEN_IS_HASWELL diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 9484574..cd2cc76 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -52,7 +52,8 @@ static void brw_blorp_upload_shader(struct blorp_context *blorp, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, uint32_t *kernel_out, void *prog_data_out) { struct brw_context *brw = blorp->driver_ctx; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 921cc00..afb1057 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3673,6 +3673,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, */ setup_color_payload(bld, key, &sources[length], src0_alpha, 1); length++; + } else if (key->replicate_alpha && inst->target != 0) { + /* Handle the case when fragment shader doesn't write to draw buffer + * zero. No need to call setup_color_payload() for src0_alpha because + * alpha value will be undefined. + */ + length++; } setup_color_payload(bld, key, &sources[length], color0, components); diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 136ac7b..8fd574e 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -291,12 +291,18 @@ _mesa_new_shader_program(GLuint name) * Clear (free) the shader program state that gets produced by linking. */ void -_mesa_clear_shader_program_data(struct gl_shader_program *shProg) +_mesa_clear_shader_program_data(struct gl_context *ctx, + struct gl_shader_program *shProg) { - unsigned i; + for (gl_shader_stage sh = 0; sh < MESA_SHADER_STAGES; sh++) { + if (shProg->_LinkedShaders[sh] != NULL) { + _mesa_delete_linked_shader(ctx, shProg->_LinkedShaders[sh]); + shProg->_LinkedShaders[sh] = NULL; + } + } if (shProg->UniformStorage) { - for (i = 0; i < shProg->NumUniformStorage; ++i) + for (unsigned i = 0; i < shProg->NumUniformStorage; ++i) _mesa_uniform_detach_all_driver_storage(&shProg->UniformStorage[i]); ralloc_free(shProg->UniformStorage); shProg->NumUniformStorage = 0; @@ -347,11 +353,10 @@ _mesa_free_shader_program_data(struct gl_context *ctx, struct gl_shader_program *shProg) { GLuint i; - gl_shader_stage sh; assert(shProg->Type == GL_SHADER_PROGRAM_MESA); - _mesa_clear_shader_program_data(shProg); + _mesa_clear_shader_program_data(ctx, shProg); if (shProg->AttributeBindings) { string_to_uint_map_dtor(shProg->AttributeBindings); @@ -385,14 +390,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx, shProg->TransformFeedback.VaryingNames = NULL; shProg->TransformFeedback.NumVarying = 0; - - for (sh = 0; sh < MESA_SHADER_STAGES; sh++) { - if (shProg->_LinkedShaders[sh] != NULL) { - _mesa_delete_linked_shader(ctx, shProg->_LinkedShaders[sh]); - shProg->_LinkedShaders[sh] = NULL; - } - } - free(shProg->Label); shProg->Label = NULL; } diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h index 814a7f1..1249732 100644 --- a/src/mesa/main/shaderobj.h +++ b/src/mesa/main/shaderobj.h @@ -99,7 +99,8 @@ extern struct gl_shader_program * _mesa_new_shader_program(GLuint name); extern void -_mesa_clear_shader_program_data(struct gl_shader_program *shProg); +_mesa_clear_shader_program_data(struct gl_context *ctx, + struct gl_shader_program *shProg); extern void _mesa_free_shader_program_data(struct gl_context *ctx, diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index bd65df2..2a29d69 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -3052,7 +3052,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { unsigned int i; - _mesa_clear_shader_program_data(prog); + _mesa_clear_shader_program_data(ctx, prog); prog->LinkStatus = GL_TRUE; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f376462..c4c08db 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -772,9 +772,9 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, int i = u_bit_scan(&writemask); - /* before emitting the instruction, see if we have to adjust store + /* before emitting the instruction, see if we have to adjust load / store * address */ - if (i > 1 && inst->op == TGSI_OPCODE_STORE && + if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) && addr.file == PROGRAM_UNDEFINED) { /* We have to advance the buffer address by 16 */ addr = get_temp(glsl_type::uint_type); @@ -782,7 +782,6 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, inst->src[0], st_src_reg_for_int(16)); } - /* first time use previous instruction */ if (dinst == NULL) { dinst = inst; @@ -802,11 +801,10 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; dinst->dst[j].index = initial_dst_idx[j]; if (i > 1) { - if (dinst->op == TGSI_OPCODE_STORE) { + if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE) dinst->src[0] = addr; - } else { + if (dinst->op != TGSI_OPCODE_STORE) dinst->dst[j].index++; - } } } else { /* if we aren't writing to a double, just get the bit of the initial writemask diff --git a/src/mesa/state_tracker/st_sampler_view.c b/src/mesa/state_tracker/st_sampler_view.c index 9fe0bfe..2b2fa8b 100644 --- a/src/mesa/state_tracker/st_sampler_view.c +++ b/src/mesa/state_tracker/st_sampler_view.c @@ -430,8 +430,12 @@ st_create_texture_sampler_view_from_stobj(struct st_context *st, templ.u.tex.first_level = stObj->base.MinLevel + stObj->base.BaseLevel; templ.u.tex.last_level = last_level(stObj); assert(templ.u.tex.first_level <= templ.u.tex.last_level); - templ.u.tex.first_layer = stObj->base.MinLayer; - templ.u.tex.last_layer = last_layer(stObj); + if (stObj->layer_override) { + templ.u.tex.first_layer = templ.u.tex.last_layer = stObj->layer_override; + } else { + templ.u.tex.first_layer = stObj->base.MinLayer; + templ.u.tex.last_layer = last_layer(stObj); + } assert(templ.u.tex.first_layer <= templ.u.tex.last_layer); templ.target = gl_target_to_pipe(stObj->base.Target); } @@ -478,8 +482,11 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, assert(stObj->base.MinLevel + stObj->base.BaseLevel == view->u.tex.first_level); assert(last_level(stObj) == view->u.tex.last_level); - assert(stObj->base.MinLayer == view->u.tex.first_layer); - assert(last_layer(stObj) == view->u.tex.last_layer); + assert(stObj->layer_override || stObj->base.MinLayer == view->u.tex.first_layer); + assert(stObj->layer_override || last_layer(stObj) == view->u.tex.last_layer); + assert(!stObj->layer_override || + (stObj->layer_override == view->u.tex.first_layer && + stObj->layer_override == view->u.tex.last_layer)); } } else { diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index 730843a..0ce7989 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -108,6 +108,15 @@ struct st_texture_object */ enum pipe_format surface_format; + /* When non-zero, samplers should use this layer instead of the one + * specified by the GL state. + * + * This is used for VDPAU interop, where imported pipe_resources may be + * array textures (containing layers with different fields) even though the + * GL state describes one non-array texture per field. + */ + uint layer_override; + /** The glsl version of the shader seen during the previous validation */ unsigned prev_glsl_version; /** The value of the sampler's sRGBDecode state at the previous validation */ diff --git a/src/mesa/state_tracker/st_vdpau.c b/src/mesa/state_tracker/st_vdpau.c index 7912057..0273815 100644 --- a/src/mesa/state_tracker/st_vdpau.c +++ b/src/mesa/state_tracker/st_vdpau.c @@ -189,8 +189,8 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access, struct st_texture_image *stImage = st_texture_image(texImage); struct pipe_resource *res; - struct pipe_sampler_view templ, **sampler_view; mesa_format texFormat; + uint layer_override = 0; if (output) { res = st_vdpau_output_surface_dma_buf(ctx, vdpSurface); @@ -201,8 +201,10 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access, } else { res = st_vdpau_video_surface_dma_buf(ctx, vdpSurface, index); - if (!res) + if (!res) { res = st_vdpau_video_surface_gallium(ctx, vdpSurface, index); + layer_override = index & 1; + } } if (!res) { @@ -233,18 +235,8 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access, st_texture_release_all_sampler_views(st, stObj); pipe_resource_reference(&stImage->pt, res); - u_sampler_view_default_template(&templ, res, res->format); - templ.u.tex.first_layer = index & 1; - templ.u.tex.last_layer = index & 1; - templ.swizzle_r = GET_SWZ(stObj->base._Swizzle, 0); - templ.swizzle_g = GET_SWZ(stObj->base._Swizzle, 1); - templ.swizzle_b = GET_SWZ(stObj->base._Swizzle, 2); - templ.swizzle_a = GET_SWZ(stObj->base._Swizzle, 3); - - sampler_view = st_texture_get_sampler_view(st, stObj); - *sampler_view = st->pipe->create_sampler_view(st->pipe, res, &templ); - stObj->surface_format = res->format; + stObj->layer_override = layer_override; _mesa_dirty_texobj(ctx, texObj); pipe_resource_reference(&res, NULL); @@ -264,6 +256,8 @@ st_vdpau_unmap_surface(struct gl_context *ctx, GLenum target, GLenum access, st_texture_release_all_sampler_views(st, stObj); pipe_resource_reference(&stImage->pt, NULL); + stObj->layer_override = 0; + _mesa_dirty_texobj(ctx, texObj); st_flush(st, NULL, 0); diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index 0128e1c..2280651 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -117,6 +117,8 @@ wsi_x11_get_connection(struct wsi_device *wsi_dev, struct wsi_x11_connection *wsi_conn = wsi_x11_connection_create(alloc, conn); + if (!wsi_conn) + return NULL; pthread_mutex_lock(&wsi->mutex); @@ -889,6 +891,10 @@ wsi_x11_finish_wsi(struct wsi_device *wsi_device, (struct wsi_x11 *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_XCB]; if (wsi) { + struct hash_entry *entry; + hash_table_foreach(wsi->connections, entry) + wsi_x11_connection_destroy(alloc, entry->data); + _mesa_hash_table_destroy(wsi->connections, NULL); pthread_mutex_destroy(&wsi->mutex); |